Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Methods for mixed CDFT calculations
10 : !> \par History
11 : !> Separated CDFT routines from mixed_environment_utils
12 : !> \author Nico Holmberg [01.2017]
13 : ! **************************************************************************************************
14 : MODULE mixed_cdft_methods
15 : USE ao_util, ONLY: exp_radius_very_extended
16 : USE atomic_kind_types, ONLY: atomic_kind_type,&
17 : get_atomic_kind
18 : USE cell_types, ONLY: cell_type,&
19 : pbc
20 : USE cp_array_utils, ONLY: cp_1d_i_p_type,&
21 : cp_1d_r_p_type,&
22 : cp_2d_r_p_type
23 : USE cp_control_types, ONLY: dft_control_type
24 : USE cp_dbcsr_api, ONLY: &
25 : dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_init_p, dbcsr_p_type, dbcsr_release, &
26 : dbcsr_release_p, dbcsr_scale, dbcsr_type
27 : USE cp_dbcsr_diag, ONLY: cp_dbcsr_syevd
28 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply
29 : USE cp_fm_basic_linalg, ONLY: cp_fm_column_scale,&
30 : cp_fm_invert,&
31 : cp_fm_transpose
32 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
33 : cp_fm_struct_release,&
34 : cp_fm_struct_type
35 : USE cp_fm_types, ONLY: cp_fm_create,&
36 : cp_fm_get_info,&
37 : cp_fm_release,&
38 : cp_fm_set_all,&
39 : cp_fm_to_fm,&
40 : cp_fm_type,&
41 : cp_fm_write_formatted
42 : USE cp_log_handling, ONLY: cp_get_default_logger,&
43 : cp_logger_type,&
44 : cp_to_string
45 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
46 : cp_print_key_unit_nr
47 : USE cp_realspace_grid_cube, ONLY: cp_pw_to_cube
48 : USE cp_subsys_types, ONLY: cp_subsys_get,&
49 : cp_subsys_type
50 : USE cp_units, ONLY: cp_unit_from_cp2k
51 : USE force_env_types, ONLY: force_env_get,&
52 : force_env_type,&
53 : use_qmmm,&
54 : use_qmmmx,&
55 : use_qs_force
56 : USE grid_api, ONLY: GRID_FUNC_AB,&
57 : collocate_pgf_product
58 : USE hirshfeld_methods, ONLY: create_shape_function
59 : USE hirshfeld_types, ONLY: hirshfeld_type
60 : USE input_constants, ONLY: &
61 : becke_cutoff_element, becke_cutoff_global, cdft_alpha_constraint, cdft_beta_constraint, &
62 : cdft_charge_constraint, cdft_magnetization_constraint, mix_cdft, mixed_cdft_parallel, &
63 : mixed_cdft_parallel_nobuild, mixed_cdft_serial, outer_scf_becke_constraint
64 : USE input_section_types, ONLY: section_get_lval,&
65 : section_vals_get,&
66 : section_vals_get_subs_vals,&
67 : section_vals_type,&
68 : section_vals_val_get
69 : USE kinds, ONLY: default_path_length,&
70 : dp
71 : USE machine, ONLY: m_walltime
72 : USE mathlib, ONLY: diamat_all
73 : USE memory_utilities, ONLY: reallocate
74 : USE message_passing, ONLY: mp_request_type,&
75 : mp_testall,&
76 : mp_waitall
77 : USE mixed_cdft_types, ONLY: mixed_cdft_result_type_set,&
78 : mixed_cdft_settings_type,&
79 : mixed_cdft_type,&
80 : mixed_cdft_type_create,&
81 : mixed_cdft_work_type_release
82 : USE mixed_cdft_utils, ONLY: &
83 : hfun_zero, map_permutation_to_states, mixed_cdft_assemble_block_diag, &
84 : mixed_cdft_diagonalize_blocks, mixed_cdft_get_blocks, mixed_cdft_init_structures, &
85 : mixed_cdft_parse_settings, mixed_cdft_print_couplings, mixed_cdft_read_block_diag, &
86 : mixed_cdft_redistribute_arrays, mixed_cdft_release_work, mixed_cdft_transfer_settings
87 : USE mixed_environment_types, ONLY: get_mixed_env,&
88 : mixed_environment_type,&
89 : set_mixed_env
90 : USE parallel_gemm_api, ONLY: parallel_gemm
91 : USE particle_list_types, ONLY: particle_list_type
92 : USE particle_types, ONLY: particle_type
93 : USE pw_env_types, ONLY: pw_env_get,&
94 : pw_env_type
95 : USE pw_methods, ONLY: pw_copy,&
96 : pw_scale,&
97 : pw_zero
98 : USE pw_pool_types, ONLY: pw_pool_type
99 : USE qs_cdft_types, ONLY: cdft_control_type
100 : USE qs_energy_types, ONLY: qs_energy_type
101 : USE qs_environment_types, ONLY: get_qs_env,&
102 : qs_environment_type
103 : USE qs_kind_types, ONLY: qs_kind_type
104 : USE qs_mo_io, ONLY: read_mo_set_from_restart,&
105 : wfn_restart_file_name
106 : USE qs_mo_methods, ONLY: make_basis_simple,&
107 : make_basis_sm
108 : USE qs_mo_types, ONLY: allocate_mo_set,&
109 : deallocate_mo_set,&
110 : mo_set_type,&
111 : set_mo_set
112 : USE realspace_grid_types, ONLY: realspace_grid_type,&
113 : rs_grid_zero,&
114 : transfer_rs2pw
115 : USE util, ONLY: sort
116 : #include "./base/base_uses.f90"
117 :
118 : IMPLICIT NONE
119 :
120 : PRIVATE
121 :
122 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mixed_cdft_methods'
123 : LOGICAL, PARAMETER, PRIVATE :: debug_this_module = .FALSE.
124 :
125 : PUBLIC :: mixed_cdft_init, &
126 : mixed_cdft_build_weight, &
127 : mixed_cdft_calculate_coupling
128 :
129 : CONTAINS
130 :
131 : ! **************************************************************************************************
132 : !> \brief Initialize a mixed CDFT calculation
133 : !> \param force_env the force_env that holds the CDFT states
134 : !> \param calculate_forces determines if forces should be calculated
135 : !> \par History
136 : !> 01.2016 created [Nico Holmberg]
137 : ! **************************************************************************************************
138 524 : SUBROUTINE mixed_cdft_init(force_env, calculate_forces)
139 : TYPE(force_env_type), POINTER :: force_env
140 : LOGICAL, INTENT(IN) :: calculate_forces
141 :
142 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_init'
143 :
144 : INTEGER :: et_freq, handle, iforce_eval, iounit, &
145 : mixing_type, nforce_eval
146 : LOGICAL :: explicit, is_parallel, is_qmmm
147 : TYPE(cp_logger_type), POINTER :: logger
148 : TYPE(cp_subsys_type), POINTER :: subsys_mix
149 : TYPE(force_env_type), POINTER :: force_env_qs
150 : TYPE(mixed_cdft_settings_type) :: settings
151 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
152 : TYPE(mixed_environment_type), POINTER :: mixed_env
153 : TYPE(particle_list_type), POINTER :: particles_mix
154 : TYPE(section_vals_type), POINTER :: force_env_section, mapping_section, &
155 : md_section, mixed_section, &
156 : print_section, root_section
157 :
158 524 : NULLIFY (subsys_mix, force_env_qs, force_env_section, print_section, &
159 524 : root_section, mixed_section, md_section, mixed_env, mixed_cdft, &
160 524 : mapping_section)
161 :
162 : NULLIFY (settings%grid_span, settings%npts, settings%cutoff, settings%rel_cutoff, &
163 : settings%spherical, settings%rs_dims, settings%odd, settings%atoms, &
164 : settings%coeffs, settings%si, settings%sr, &
165 : settings%cutoffs, settings%radii)
166 :
167 524 : is_qmmm = .FALSE.
168 1048 : logger => cp_get_default_logger()
169 524 : CPASSERT(ASSOCIATED(force_env))
170 524 : nforce_eval = SIZE(force_env%sub_force_env)
171 524 : CALL timeset(routineN, handle)
172 524 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
173 524 : mixed_env => force_env%mixed_env
174 524 : mixed_section => section_vals_get_subs_vals(force_env_section, "MIXED")
175 524 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
176 524 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
177 : ! Check if a mixed CDFT calculation is requested
178 524 : CALL section_vals_val_get(mixed_section, "MIXING_TYPE", i_val=mixing_type)
179 524 : IF (mixing_type == mix_cdft .AND. .NOT. ASSOCIATED(mixed_env%cdft_control)) THEN
180 72 : mixed_env%do_mixed_cdft = .TRUE.
181 144 : IF (mixed_env%do_mixed_cdft) THEN
182 : ! Sanity check
183 72 : IF (nforce_eval .LT. 2) &
184 : CALL cp_abort(__LOCATION__, &
185 0 : "Mixed CDFT calculation requires at least 2 force_evals.")
186 72 : mapping_section => section_vals_get_subs_vals(mixed_section, "MAPPING")
187 72 : CALL section_vals_get(mapping_section, explicit=explicit)
188 : ! The sub_force_envs must share the same geometrical structure
189 72 : IF (explicit) &
190 0 : CPABORT("Please disable section &MAPPING for mixed CDFT calculations")
191 72 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%COUPLING", i_val=et_freq)
192 72 : IF (et_freq .LT. 0) THEN
193 0 : mixed_env%do_mixed_et = .FALSE.
194 : ELSE
195 72 : mixed_env%do_mixed_et = .TRUE.
196 72 : IF (et_freq == 0) THEN
197 0 : mixed_env%et_freq = 1
198 : ELSE
199 72 : mixed_env%et_freq = et_freq
200 : END IF
201 : END IF
202 : ! Start initializing the mixed_cdft type
203 : ! First determine if the calculation is pure DFT or QMMM and find the qs force_env
204 240 : DO iforce_eval = 1, nforce_eval
205 168 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
206 276 : SELECT CASE (force_env%sub_force_env(iforce_eval)%force_env%in_use)
207 : CASE (use_qs_force)
208 132 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
209 : CASE (use_qmmm)
210 12 : is_qmmm = .TRUE.
211 : ! This is really the container for QMMM
212 12 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
213 : CASE (use_qmmmx)
214 0 : CPABORT("No force mixing allowed for mixed CDFT QM/MM")
215 : CASE DEFAULT
216 144 : CPASSERT(.FALSE.)
217 : END SELECT
218 216 : CPASSERT(ASSOCIATED(force_env_qs))
219 : END DO
220 : ! Get infos about the mixed subsys
221 72 : IF (.NOT. is_qmmm) THEN
222 : CALL force_env_get(force_env=force_env, &
223 64 : subsys=subsys_mix)
224 : CALL cp_subsys_get(subsys=subsys_mix, &
225 64 : particles=particles_mix)
226 : ELSE
227 : CALL get_qs_env(force_env_qs%qmmm_env%qs_env, &
228 8 : cp_subsys=subsys_mix)
229 : CALL cp_subsys_get(subsys=subsys_mix, &
230 8 : particles=particles_mix)
231 : END IF
232 : ! Init mixed_cdft_type
233 72 : ALLOCATE (mixed_cdft)
234 72 : CALL mixed_cdft_type_create(mixed_cdft)
235 72 : mixed_cdft%first_iteration = .TRUE.
236 : ! Determine what run type to use
237 72 : IF (mixed_env%ngroups == 1) THEN
238 : ! States treated in serial, possibly copying CDFT weight function and gradients from state to state
239 48 : mixed_cdft%run_type = mixed_cdft_serial
240 24 : ELSE IF (mixed_env%ngroups == 2) THEN
241 24 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%PARALLEL_BUILD", l_val=is_parallel)
242 24 : IF (is_parallel) THEN
243 : ! Treat states in parallel, build weight function and gradients in parallel before SCF process
244 22 : mixed_cdft%run_type = mixed_cdft_parallel
245 22 : IF (.NOT. nforce_eval == 2) &
246 : CALL cp_abort(__LOCATION__, &
247 0 : "Parallel mode mixed CDFT calculation supports only 2 force_evals.")
248 : ELSE
249 : ! Treat states in parallel, but each states builds its own weight function and gradients
250 2 : mixed_cdft%run_type = mixed_cdft_parallel_nobuild
251 : END IF
252 : ELSE
253 0 : mixed_cdft%run_type = mixed_cdft_parallel_nobuild
254 : END IF
255 : ! Store QMMM flag
256 72 : mixed_env%do_mixed_qmmm_cdft = is_qmmm
257 : ! Setup dynamic load balancing
258 72 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%DLB", l_val=mixed_cdft%dlb)
259 72 : mixed_cdft%dlb = mixed_cdft%dlb .AND. calculate_forces ! disable if forces are not needed
260 72 : mixed_cdft%dlb = mixed_cdft%dlb .AND. (mixed_cdft%run_type == mixed_cdft_parallel) ! disable if not parallel
261 72 : IF (mixed_cdft%dlb) THEN
262 40 : ALLOCATE (mixed_cdft%dlb_control)
263 4 : NULLIFY (mixed_cdft%dlb_control%weight, mixed_cdft%dlb_control%gradients, &
264 4 : mixed_cdft%dlb_control%cavity, mixed_cdft%dlb_control%target_list, &
265 4 : mixed_cdft%dlb_control%bo, mixed_cdft%dlb_control%expected_work, &
266 4 : mixed_cdft%dlb_control%prediction_error, mixed_cdft%dlb_control%sendbuff, &
267 4 : mixed_cdft%dlb_control%recvbuff, mixed_cdft%dlb_control%recv_work_repl, &
268 4 : mixed_cdft%dlb_control%recv_info)
269 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%LOAD_SCALE", &
270 4 : r_val=mixed_cdft%dlb_control%load_scale)
271 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%VERY_OVERLOADED", &
272 4 : r_val=mixed_cdft%dlb_control%very_overloaded)
273 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%MORE_WORK", &
274 4 : i_val=mixed_cdft%dlb_control%more_work)
275 : END IF
276 : ! Metric/Wavefunction overlap method/Lowdin orthogonalization/CDFT-CI
277 72 : mixed_cdft%calculate_metric = .FALSE.
278 72 : mixed_cdft%wfn_overlap_method = .FALSE.
279 72 : mixed_cdft%use_lowdin = .FALSE.
280 72 : mixed_cdft%do_ci = .FALSE.
281 72 : mixed_cdft%nonortho_coupling = .FALSE.
282 72 : mixed_cdft%identical_constraints = .TRUE.
283 72 : mixed_cdft%block_diagonalize = .FALSE.
284 72 : IF (mixed_env%do_mixed_et) THEN
285 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%METRIC", &
286 72 : l_val=mixed_cdft%calculate_metric)
287 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%WFN_OVERLAP", &
288 72 : l_val=mixed_cdft%wfn_overlap_method)
289 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%LOWDIN", &
290 72 : l_val=mixed_cdft%use_lowdin)
291 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%CI", &
292 72 : l_val=mixed_cdft%do_ci)
293 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%NONORTHOGONAL_COUPLING", &
294 72 : l_val=mixed_cdft%nonortho_coupling)
295 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%BLOCK_DIAGONALIZE", &
296 72 : l_val=mixed_cdft%block_diagonalize)
297 : END IF
298 : ! Inversion method
299 72 : CALL section_vals_val_get(mixed_section, "MIXED_CDFT%EPS_SVD", r_val=mixed_cdft%eps_svd)
300 72 : IF (mixed_cdft%eps_svd .LT. 0.0_dp .OR. mixed_cdft%eps_svd .GT. 1.0_dp) &
301 0 : CPABORT("Illegal value for EPS_SVD. Value must be between 0.0 and 1.0.")
302 : ! MD related settings
303 72 : CALL force_env_get(force_env, root_section=root_section)
304 72 : md_section => section_vals_get_subs_vals(root_section, "MOTION%MD")
305 72 : CALL section_vals_val_get(md_section, "TIMESTEP", r_val=mixed_cdft%sim_dt)
306 72 : CALL section_vals_val_get(md_section, "STEP_START_VAL", i_val=mixed_cdft%sim_step)
307 72 : mixed_cdft%sim_step = mixed_cdft%sim_step - 1 ! to get the first step correct
308 72 : mixed_cdft%sim_dt = cp_unit_from_cp2k(mixed_cdft%sim_dt, "fs")
309 : ! Parse constraint settings from the individual force_evals and check consistency
310 : CALL mixed_cdft_parse_settings(force_env, mixed_env, mixed_cdft, &
311 72 : settings, natom=SIZE(particles_mix%els))
312 : ! Transfer settings to mixed_cdft
313 72 : CALL mixed_cdft_transfer_settings(force_env, mixed_cdft, settings)
314 : ! Initilize necessary structures
315 72 : CALL mixed_cdft_init_structures(force_env, force_env_qs, mixed_env, mixed_cdft, settings)
316 : ! Write information about the mixed CDFT calculation
317 72 : IF (iounit > 0) THEN
318 36 : WRITE (iounit, *) ""
319 : WRITE (iounit, FMT="(T2,A,T71)") &
320 36 : "MIXED_CDFT| Activating mixed CDFT calculation"
321 : WRITE (iounit, FMT="(T2,A,T71,I10)") &
322 36 : "MIXED_CDFT| Number of CDFT states: ", nforce_eval
323 47 : SELECT CASE (mixed_cdft%run_type)
324 : CASE (mixed_cdft_parallel)
325 : WRITE (iounit, FMT="(T2,A,T71)") &
326 11 : "MIXED_CDFT| CDFT states calculation mode: parallel with build"
327 : WRITE (iounit, FMT="(T2,A,T71)") &
328 11 : "MIXED_CDFT| Becke constraint is first built using all available processors"
329 : WRITE (iounit, FMT="(T2,A,T71)") &
330 11 : " and then copied to both states with their own processor groups"
331 : CASE (mixed_cdft_serial)
332 : WRITE (iounit, FMT="(T2,A,T71)") &
333 24 : "MIXED_CDFT| CDFT states calculation mode: serial"
334 24 : IF (mixed_cdft%identical_constraints) THEN
335 : WRITE (iounit, FMT="(T2,A,T71)") &
336 23 : "MIXED_CDFT| The constraints are built before the SCF procedure of the first"
337 : WRITE (iounit, FMT="(T2,A,T71)") &
338 23 : " CDFT state and subsequently copied to the other states"
339 : ELSE
340 : WRITE (iounit, FMT="(T2,A,T71)") &
341 1 : "MIXED_CDFT| The constraints are separately built for all CDFT states"
342 : END IF
343 : CASE (mixed_cdft_parallel_nobuild)
344 : WRITE (iounit, FMT="(T2,A,T71)") &
345 1 : "MIXED_CDFT| CDFT states calculation mode: parallel without build"
346 : WRITE (iounit, FMT="(T2,A,T71)") &
347 1 : "MIXED_CDFT| The constraints are separately built for all CDFT states"
348 : CASE DEFAULT
349 36 : CPABORT("Unknown mixed CDFT run type.")
350 : END SELECT
351 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
352 36 : "MIXED_CDFT| Calculating electronic coupling between states: ", mixed_env%do_mixed_et
353 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
354 36 : "MIXED_CDFT| Calculating electronic coupling reliability metric: ", mixed_cdft%calculate_metric
355 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
356 36 : "MIXED_CDFT| Configuration interaction (CDFT-CI) was requested: ", mixed_cdft%do_ci
357 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
358 36 : "MIXED_CDFT| Block diagonalizing the mixed CDFT Hamiltonian: ", mixed_cdft%block_diagonalize
359 36 : IF (mixed_cdft%run_type == mixed_cdft_parallel) THEN
360 : WRITE (iounit, FMT="(T2,A,T71,L10)") &
361 11 : "MIXED_CDFT| Dynamic load balancing enabled: ", mixed_cdft%dlb
362 11 : IF (mixed_cdft%dlb) THEN
363 2 : WRITE (iounit, FMT="(T2,A,T71)") "MIXED_CDFT| Dynamic load balancing parameters:"
364 : WRITE (iounit, FMT="(T2,A,T71,F10.2)") &
365 2 : "MIXED_CDFT| load_scale:", mixed_cdft%dlb_control%load_scale
366 : WRITE (iounit, FMT="(T2,A,T71,F10.2)") &
367 2 : "MIXED_CDFT| very_overloaded:", mixed_cdft%dlb_control%very_overloaded
368 : WRITE (iounit, FMT="(T2,A,T71,I10)") &
369 2 : "MIXED_CDFT| more_work:", mixed_cdft%dlb_control%more_work
370 : END IF
371 : END IF
372 36 : IF (mixed_env%do_mixed_et) THEN
373 36 : IF (mixed_cdft%eps_svd == 0.0_dp) THEN
374 29 : WRITE (iounit, FMT="(T2,A,T71)") "MIXED_CDFT| Matrix inversions calculated with LU decomposition."
375 : ELSE
376 7 : WRITE (iounit, FMT="(T2,A,T71)") "MIXED_CDFT| Matrix inversions calculated with SVD decomposition."
377 7 : WRITE (iounit, FMT="(T2,A,T71,ES10.2)") "MIXED_CDFT| EPS_SVD:", mixed_cdft%eps_svd
378 : END IF
379 : END IF
380 : END IF
381 72 : CALL set_mixed_env(mixed_env, cdft_control=mixed_cdft)
382 : END IF
383 : END IF
384 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
385 524 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
386 524 : CALL timestop(handle)
387 :
388 5240 : END SUBROUTINE mixed_cdft_init
389 :
390 : ! **************************************************************************************************
391 : !> \brief Driver routine to handle the build of CDFT weight/gradient in parallel and serial modes
392 : !> \param force_env the force_env that holds the CDFT states
393 : !> \param calculate_forces if forces should be calculated
394 : !> \param iforce_eval index of the currently active CDFT state (serial mode only)
395 : !> \par History
396 : !> 01.2017 created [Nico Holmberg]
397 : ! **************************************************************************************************
398 270 : SUBROUTINE mixed_cdft_build_weight(force_env, calculate_forces, iforce_eval)
399 : TYPE(force_env_type), POINTER :: force_env
400 : LOGICAL, INTENT(IN) :: calculate_forces
401 : INTEGER, INTENT(IN), OPTIONAL :: iforce_eval
402 :
403 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
404 :
405 270 : NULLIFY (mixed_cdft)
406 270 : CPASSERT(ASSOCIATED(force_env))
407 270 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
408 270 : CPASSERT(ASSOCIATED(mixed_cdft))
409 270 : IF (.NOT. PRESENT(iforce_eval)) THEN
410 128 : SELECT CASE (mixed_cdft%run_type)
411 : CASE (mixed_cdft_parallel)
412 34 : CALL mixed_cdft_build_weight_parallel(force_env, calculate_forces)
413 : CASE (mixed_cdft_parallel_nobuild)
414 94 : CALL mixed_cdft_set_flags(force_env)
415 : CASE DEFAULT
416 : ! Do nothing
417 : END SELECT
418 : ELSE
419 316 : SELECT CASE (mixed_cdft%run_type)
420 : CASE (mixed_cdft_serial)
421 176 : CALL mixed_cdft_transfer_weight(force_env, calculate_forces, iforce_eval)
422 : CASE DEFAULT
423 : ! Do nothing
424 : END SELECT
425 : END IF
426 :
427 270 : END SUBROUTINE mixed_cdft_build_weight
428 :
429 : ! **************************************************************************************************
430 : !> \brief Build CDFT weight and gradient on 2N processors and copy it to two N processor subgroups
431 : !> \param force_env the force_env that holds the CDFT states
432 : !> \param calculate_forces if forces should be calculted
433 : !> \par History
434 : !> 01.2016 created [Nico Holmberg]
435 : ! **************************************************************************************************
436 34 : SUBROUTINE mixed_cdft_build_weight_parallel(force_env, calculate_forces)
437 : TYPE(force_env_type), POINTER :: force_env
438 : LOGICAL, INTENT(IN) :: calculate_forces
439 :
440 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_build_weight_parallel'
441 :
442 : INTEGER :: handle, handle2, i, iforce_eval, ind, INDEX(6), iounit, j, lb_min, &
443 : my_special_work, natom, nforce_eval, recv_offset, ub_max
444 : INTEGER, DIMENSION(2, 3) :: bo
445 34 : INTEGER, DIMENSION(:), POINTER :: lb, sendbuffer_i, ub
446 : REAL(KIND=dp) :: t1, t2
447 : TYPE(cdft_control_type), POINTER :: cdft_control, cdft_control_target
448 : TYPE(cp_logger_type), POINTER :: logger
449 : TYPE(cp_subsys_type), POINTER :: subsys_mix
450 : TYPE(dft_control_type), POINTER :: dft_control
451 : TYPE(force_env_type), POINTER :: force_env_qs
452 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
453 : TYPE(mixed_environment_type), POINTER :: mixed_env
454 34 : TYPE(mp_request_type), DIMENSION(:), POINTER :: req_total
455 : TYPE(particle_list_type), POINTER :: particles_mix
456 : TYPE(pw_env_type), POINTER :: pw_env
457 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool, mixed_auxbas_pw_pool
458 : TYPE(qs_environment_type), POINTER :: qs_env
459 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
460 :
461 : TYPE buffers
462 : INTEGER :: imap(6)
463 : INTEGER, DIMENSION(:), &
464 : POINTER :: iv => null()
465 : REAL(KIND=dp), POINTER, &
466 : DIMENSION(:, :, :) :: r3 => null()
467 : REAL(KIND=dp), POINTER, &
468 : DIMENSION(:, :, :, :) :: r4 => null()
469 : END TYPE buffers
470 34 : TYPE(buffers), DIMENSION(:), POINTER :: recvbuffer
471 :
472 34 : NULLIFY (subsys_mix, force_env_qs, particles_mix, force_env_section, print_section, &
473 34 : mixed_env, mixed_cdft, pw_env, auxbas_pw_pool, mixed_auxbas_pw_pool, &
474 34 : qs_env, dft_control, sendbuffer_i, lb, ub, req_total, recvbuffer, &
475 34 : cdft_control, cdft_control_target)
476 :
477 68 : logger => cp_get_default_logger()
478 34 : CPASSERT(ASSOCIATED(force_env))
479 34 : nforce_eval = SIZE(force_env%sub_force_env)
480 34 : CALL timeset(routineN, handle)
481 34 : t1 = m_walltime()
482 : ! Get infos about the mixed subsys
483 : CALL force_env_get(force_env=force_env, &
484 : subsys=subsys_mix, &
485 34 : force_env_section=force_env_section)
486 : CALL cp_subsys_get(subsys=subsys_mix, &
487 34 : particles=particles_mix)
488 102 : DO iforce_eval = 1, nforce_eval
489 68 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
490 34 : SELECT CASE (force_env%sub_force_env(iforce_eval)%force_env%in_use)
491 : CASE (use_qs_force)
492 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
493 : CASE (use_qmmm)
494 0 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
495 : CASE DEFAULT
496 68 : CPASSERT(.FALSE.)
497 : END SELECT
498 : END DO
499 34 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
500 : CALL force_env_get(force_env=force_env_qs, &
501 : qs_env=qs_env, &
502 26 : subsys=subsys_mix)
503 : CALL cp_subsys_get(subsys=subsys_mix, &
504 26 : particles=particles_mix)
505 : ELSE
506 8 : qs_env => force_env_qs%qmmm_env%qs_env
507 8 : CALL get_qs_env(qs_env, cp_subsys=subsys_mix)
508 : CALL cp_subsys_get(subsys=subsys_mix, &
509 8 : particles=particles_mix)
510 : END IF
511 34 : mixed_env => force_env%mixed_env
512 34 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
513 34 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
514 34 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
515 34 : CPASSERT(ASSOCIATED(mixed_cdft))
516 34 : cdft_control => mixed_cdft%cdft_control
517 34 : CPASSERT(ASSOCIATED(cdft_control))
518 : ! Calculate the Becke weight function and gradient on all np processors
519 34 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_pw_pool=mixed_auxbas_pw_pool)
520 34 : natom = SIZE(particles_mix%els)
521 34 : CALL mixed_becke_constraint(force_env, calculate_forces)
522 : ! Start replicating the working arrays on both np/2 processor groups
523 34 : mixed_cdft%sim_step = mixed_cdft%sim_step + 1
524 34 : CALL get_qs_env(qs_env, pw_env=pw_env, dft_control=dft_control)
525 34 : cdft_control_target => dft_control%qs_control%cdft_control
526 34 : CPASSERT(dft_control%qs_control%cdft)
527 34 : CPASSERT(ASSOCIATED(cdft_control_target))
528 34 : CALL pw_env_get(pw_env, auxbas_pw_pool=auxbas_pw_pool)
529 340 : bo = auxbas_pw_pool%pw_grid%bounds_local
530 : ! First determine the size of the arrays along the confinement dir
531 34 : IF (mixed_cdft%is_special) THEN
532 : my_special_work = 2
533 : ELSE
534 34 : my_special_work = 1
535 : END IF
536 204 : ALLOCATE (recvbuffer(SIZE(mixed_cdft%source_list)))
537 238 : ALLOCATE (req_total(my_special_work*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)))
538 136 : ALLOCATE (lb(SIZE(mixed_cdft%source_list)), ub(SIZE(mixed_cdft%source_list)))
539 34 : IF (cdft_control%becke_control%cavity_confine) THEN
540 : ! Gaussian confinement => the bounds depend on the processor and need to be communicated
541 34 : ALLOCATE (sendbuffer_i(2))
542 204 : sendbuffer_i = cdft_control%becke_control%confine_bounds
543 102 : DO i = 1, SIZE(mixed_cdft%source_list)
544 68 : ALLOCATE (recvbuffer(i)%iv(2))
545 : CALL force_env%para_env%irecv(msgout=recvbuffer(i)%iv, source=mixed_cdft%source_list(i), &
546 102 : request=req_total(i))
547 : END DO
548 68 : DO i = 1, my_special_work
549 136 : DO j = 1, SIZE(mixed_cdft%dest_list)
550 68 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
551 : CALL force_env%para_env%isend(msgin=sendbuffer_i, &
552 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
553 102 : request=req_total(ind))
554 : END DO
555 : END DO
556 34 : CALL mp_waitall(req_total)
557 : ! Find the largest/smallest value of ub/lb
558 34 : DEALLOCATE (sendbuffer_i)
559 34 : lb_min = HUGE(0)
560 34 : ub_max = -HUGE(0)
561 102 : DO i = 1, SIZE(mixed_cdft%source_list)
562 68 : lb(i) = recvbuffer(i)%iv(1)
563 68 : ub(i) = recvbuffer(i)%iv(2)
564 68 : IF (lb(i) .LT. lb_min) lb_min = lb(i)
565 68 : IF (ub(i) .GT. ub_max) ub_max = ub(i)
566 102 : DEALLOCATE (recvbuffer(i)%iv)
567 : END DO
568 : ! Take into account the grids already communicated during dlb
569 34 : IF (mixed_cdft%dlb) THEN
570 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
571 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
572 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
573 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
574 8 : IF (LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3) &
575 : .LT. lb_min) lb_min = LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3)
576 16 : IF (UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3) &
577 8 : .GT. ub_max) ub_max = UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3)
578 : END DO
579 : END IF
580 : END DO
581 : END IF
582 : END IF
583 : ELSE
584 : ! No confinement
585 0 : ub_max = bo(2, 3)
586 0 : lb_min = bo(1, 3)
587 0 : lb = lb_min
588 0 : ub = ub_max
589 : END IF
590 : ! Determine the sender specific indices of grid slices that are to be received
591 34 : CALL timeset(routineN//"_comm", handle2)
592 102 : DO j = 1, SIZE(recvbuffer)
593 68 : ind = j + (j/2)
594 102 : IF (mixed_cdft%is_special) THEN
595 : recvbuffer(j)%imap = (/mixed_cdft%source_list_bo(1, j), mixed_cdft%source_list_bo(2, j), &
596 : mixed_cdft%source_list_bo(3, j), mixed_cdft%source_list_bo(4, j), &
597 0 : lb(j), ub(j)/)
598 68 : ELSE IF (mixed_cdft%is_pencil) THEN
599 0 : recvbuffer(j)%imap = (/bo(1, 1), bo(2, 1), mixed_cdft%recv_bo(ind), mixed_cdft%recv_bo(ind + 1), lb(j), ub(j)/)
600 : ELSE
601 476 : recvbuffer(j)%imap = (/mixed_cdft%recv_bo(ind), mixed_cdft%recv_bo(ind + 1), bo(1, 2), bo(2, 2), lb(j), ub(j)/)
602 : END IF
603 : END DO
604 34 : IF (mixed_cdft%dlb .AND. .NOT. mixed_cdft%is_special) THEN
605 8 : IF (mixed_cdft%dlb_control%recv_work_repl(1) .OR. mixed_cdft%dlb_control%recv_work_repl(2)) THEN
606 24 : DO j = 1, 2
607 16 : recv_offset = 0
608 16 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) &
609 16 : recv_offset = SUM(mixed_cdft%dlb_control%recv_info(j)%target_list(2, :))
610 24 : IF (mixed_cdft%is_pencil) THEN
611 0 : recvbuffer(j)%imap(1) = recvbuffer(j)%imap(1) + recv_offset
612 : ELSE
613 16 : recvbuffer(j)%imap(3) = recvbuffer(j)%imap(3) + recv_offset
614 : END IF
615 : END DO
616 : END IF
617 : END IF
618 : ! Transfer the arrays one-by-one and deallocate shared storage
619 : ! Start with the weight function
620 102 : DO j = 1, SIZE(mixed_cdft%source_list)
621 : ALLOCATE (recvbuffer(j)%r3(recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
622 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
623 340 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)))
624 :
625 : CALL force_env%para_env%irecv(msgout=recvbuffer(j)%r3, source=mixed_cdft%source_list(j), &
626 102 : request=req_total(j))
627 : END DO
628 68 : DO i = 1, my_special_work
629 136 : DO j = 1, SIZE(mixed_cdft%dest_list)
630 68 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
631 102 : IF (mixed_cdft%is_special) THEN
632 : CALL force_env%para_env%isend(msgin=mixed_cdft%sendbuff(j)%weight, &
633 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
634 0 : request=req_total(ind))
635 : ELSE
636 : CALL force_env%para_env%isend(msgin=mixed_cdft%weight, dest=mixed_cdft%dest_list(j), &
637 68 : request=req_total(ind))
638 : END IF
639 : END DO
640 : END DO
641 34 : CALL mp_waitall(req_total)
642 34 : IF (mixed_cdft%is_special) THEN
643 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
644 0 : DEALLOCATE (mixed_cdft%sendbuff(j)%weight)
645 : END DO
646 : ELSE
647 34 : DEALLOCATE (mixed_cdft%weight)
648 : END IF
649 : ! In principle, we could reduce the memory footprint of becke_pot by only transferring the nonzero portion
650 : ! of the potential, but this would require a custom integrate_v_rspace
651 34 : ALLOCATE (cdft_control_target%group(1)%weight)
652 34 : CALL auxbas_pw_pool%create_pw(cdft_control_target%group(1)%weight)
653 34 : CALL pw_zero(cdft_control_target%group(1)%weight)
654 : ! Assemble the recved slices
655 102 : DO j = 1, SIZE(mixed_cdft%source_list)
656 : cdft_control_target%group(1)%weight%array(recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
657 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
658 7136554 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)) = recvbuffer(j)%r3
659 : END DO
660 : ! Do the same for slices sent during dlb
661 34 : IF (mixed_cdft%dlb) THEN
662 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
663 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
664 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
665 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
666 : index = (/LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 1), &
667 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 1), &
668 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 2), &
669 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 2), &
670 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3), &
671 104 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, 3)/)
672 : cdft_control_target%group(1)%weight%array(INDEX(1):INDEX(2), &
673 : INDEX(3):INDEX(4), &
674 : INDEX(5):INDEX(6)) = &
675 14096 : mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight
676 16 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight)
677 : END DO
678 : END IF
679 : END DO
680 : END IF
681 : END IF
682 : ! Gaussian confinement cavity
683 34 : IF (cdft_control%becke_control%cavity_confine) THEN
684 102 : DO j = 1, SIZE(mixed_cdft%source_list)
685 : CALL force_env%para_env%irecv(msgout=recvbuffer(j)%r3, source=mixed_cdft%source_list(j), &
686 102 : request=req_total(j))
687 : END DO
688 68 : DO i = 1, my_special_work
689 136 : DO j = 1, SIZE(mixed_cdft%dest_list)
690 68 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
691 102 : IF (mixed_cdft%is_special) THEN
692 : CALL force_env%para_env%isend(msgin=mixed_cdft%sendbuff(j)%cavity, &
693 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
694 0 : request=req_total(ind))
695 : ELSE
696 : CALL force_env%para_env%isend(msgin=mixed_cdft%cavity, dest=mixed_cdft%dest_list(j), &
697 68 : request=req_total(ind))
698 : END IF
699 : END DO
700 : END DO
701 34 : CALL mp_waitall(req_total)
702 34 : IF (mixed_cdft%is_special) THEN
703 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
704 0 : DEALLOCATE (mixed_cdft%sendbuff(j)%cavity)
705 : END DO
706 : ELSE
707 34 : DEALLOCATE (mixed_cdft%cavity)
708 : END IF
709 : ! We only need the nonzero part of the confinement cavity
710 : ALLOCATE (cdft_control_target%becke_control%cavity_mat(bo(1, 1):bo(2, 1), &
711 : bo(1, 2):bo(2, 2), &
712 170 : lb_min:ub_max))
713 3504226 : cdft_control_target%becke_control%cavity_mat = 0.0_dp
714 :
715 102 : DO j = 1, SIZE(mixed_cdft%source_list)
716 : cdft_control_target%becke_control%cavity_mat(recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
717 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
718 7136554 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)) = recvbuffer(j)%r3
719 : END DO
720 34 : IF (mixed_cdft%dlb) THEN
721 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
722 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
723 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
724 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
725 : index = (/LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 1), &
726 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 1), &
727 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 2), &
728 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 2), &
729 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 3), &
730 104 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, 3)/)
731 : cdft_control_target%becke_control%cavity_mat(INDEX(1):INDEX(2), &
732 : INDEX(3):INDEX(4), &
733 : INDEX(5):INDEX(6)) = &
734 14096 : mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity
735 16 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity)
736 : END DO
737 : END IF
738 : END DO
739 : END IF
740 : END IF
741 : END IF
742 102 : DO j = 1, SIZE(mixed_cdft%source_list)
743 102 : DEALLOCATE (recvbuffer(j)%r3)
744 : END DO
745 34 : IF (calculate_forces) THEN
746 : ! Gradients of the weight function
747 72 : DO j = 1, SIZE(mixed_cdft%source_list)
748 : ALLOCATE (recvbuffer(j)%r4(3*natom, recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
749 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
750 288 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)))
751 : CALL force_env%para_env%irecv(msgout=recvbuffer(j)%r4, source=mixed_cdft%source_list(j), &
752 72 : request=req_total(j))
753 : END DO
754 48 : DO i = 1, my_special_work
755 96 : DO j = 1, SIZE(mixed_cdft%dest_list)
756 48 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + SIZE(mixed_cdft%source_list)
757 72 : IF (mixed_cdft%is_special) THEN
758 : CALL force_env%para_env%isend(msgin=mixed_cdft%sendbuff(j)%gradients, &
759 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
760 0 : request=req_total(ind))
761 : ELSE
762 : CALL force_env%para_env%isend(msgin=cdft_control%group(1)%gradients, dest=mixed_cdft%dest_list(j), &
763 48 : request=req_total(ind))
764 : END IF
765 : END DO
766 : END DO
767 24 : CALL mp_waitall(req_total)
768 24 : IF (mixed_cdft%is_special) THEN
769 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
770 0 : DEALLOCATE (mixed_cdft%sendbuff(j)%gradients)
771 : END DO
772 0 : DEALLOCATE (mixed_cdft%sendbuff)
773 : ELSE
774 24 : DEALLOCATE (cdft_control%group(1)%gradients)
775 : END IF
776 : ALLOCATE (cdft_control_target%group(1)%gradients(3*natom, bo(1, 1):bo(2, 1), &
777 144 : bo(1, 2):bo(2, 2), lb_min:ub_max))
778 72 : DO j = 1, SIZE(mixed_cdft%source_list)
779 : cdft_control_target%group(1)%gradients(:, recvbuffer(j)%imap(1):recvbuffer(j)%imap(2), &
780 : recvbuffer(j)%imap(3):recvbuffer(j)%imap(4), &
781 39235744 : recvbuffer(j)%imap(5):recvbuffer(j)%imap(6)) = recvbuffer(j)%r4
782 72 : DEALLOCATE (recvbuffer(j)%r4)
783 : END DO
784 24 : IF (mixed_cdft%dlb) THEN
785 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
786 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
787 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
788 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
789 : index = (/LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 2), &
790 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 2), &
791 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 3), &
792 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 3), &
793 : LBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 4), &
794 104 : UBOUND(mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, 4)/)
795 : cdft_control_target%group(1)%gradients(:, INDEX(1):INDEX(2), &
796 : INDEX(3):INDEX(4), &
797 : INDEX(5):INDEX(6)) = &
798 90896 : mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients
799 16 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients)
800 : END DO
801 : END IF
802 : END DO
803 : END IF
804 : END IF
805 : END IF
806 : ! Clean up remaining temporaries
807 34 : IF (mixed_cdft%dlb) THEN
808 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
809 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
810 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
811 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%recv_info(j)%target_list)) &
812 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_info(j)%target_list)
813 8 : DEALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs)
814 : END IF
815 : END DO
816 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_info, mixed_cdft%dlb_control%recvbuff)
817 : END IF
818 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%target_list)) &
819 4 : DEALLOCATE (mixed_cdft%dlb_control%target_list)
820 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_work_repl)
821 : END IF
822 34 : DEALLOCATE (recvbuffer)
823 34 : DEALLOCATE (req_total)
824 34 : DEALLOCATE (lb)
825 34 : DEALLOCATE (ub)
826 34 : CALL timestop(handle2)
827 : ! Set some flags so the weight is not rebuilt during SCF
828 34 : cdft_control_target%external_control = .TRUE.
829 34 : cdft_control_target%need_pot = .FALSE.
830 34 : cdft_control_target%transfer_pot = .FALSE.
831 : ! Store the bound indices for force calculation
832 34 : IF (calculate_forces) THEN
833 24 : cdft_control_target%becke_control%confine_bounds(2) = ub_max
834 24 : cdft_control_target%becke_control%confine_bounds(1) = lb_min
835 : END IF
836 : CALL pw_scale(cdft_control_target%group(1)%weight, &
837 34 : cdft_control_target%group(1)%weight%pw_grid%dvol)
838 : ! Set flags for ET coupling calculation
839 34 : IF (mixed_env%do_mixed_et) THEN
840 34 : IF (MODULO(mixed_cdft%sim_step, mixed_env%et_freq) == 0) THEN
841 34 : dft_control%qs_control%cdft_control%do_et = .TRUE.
842 34 : dft_control%qs_control%cdft_control%calculate_metric = mixed_cdft%calculate_metric
843 : ELSE
844 0 : dft_control%qs_control%cdft_control%do_et = .FALSE.
845 0 : dft_control%qs_control%cdft_control%calculate_metric = .FALSE.
846 : END IF
847 : END IF
848 34 : t2 = m_walltime()
849 34 : IF (iounit > 0) THEN
850 17 : WRITE (iounit, '(A)') ' '
851 17 : WRITE (iounit, '(T2,A,F6.1,A)') 'MIXED_CDFT| Becke constraint built in ', t2 - t1, ' seconds'
852 17 : WRITE (iounit, '(A)') ' '
853 : END IF
854 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
855 34 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
856 34 : CALL timestop(handle)
857 :
858 102 : END SUBROUTINE mixed_cdft_build_weight_parallel
859 :
860 : ! **************************************************************************************************
861 : !> \brief Transfer CDFT weight/gradient between force_evals
862 : !> \param force_env the force_env that holds the CDFT sub_force_envs
863 : !> \param calculate_forces if forces should be computed
864 : !> \param iforce_eval index of the currently active CDFT state
865 : !> \par History
866 : !> 01.2017 created [Nico Holmberg]
867 : ! **************************************************************************************************
868 280 : SUBROUTINE mixed_cdft_transfer_weight(force_env, calculate_forces, iforce_eval)
869 : TYPE(force_env_type), POINTER :: force_env
870 : LOGICAL, INTENT(IN) :: calculate_forces
871 : INTEGER, INTENT(IN) :: iforce_eval
872 :
873 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_transfer_weight'
874 :
875 : INTEGER :: bounds_of(8), handle, iatom, igroup, &
876 : jforce_eval, nforce_eval
877 : LOGICAL, SAVE :: first_call = .TRUE.
878 : TYPE(cdft_control_type), POINTER :: cdft_control_source, cdft_control_target
879 : TYPE(dft_control_type), POINTER :: dft_control_source, dft_control_target
880 : TYPE(force_env_type), POINTER :: force_env_qs_source, force_env_qs_target
881 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
882 : TYPE(mixed_environment_type), POINTER :: mixed_env
883 : TYPE(pw_env_type), POINTER :: pw_env_source, pw_env_target
884 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool_source, &
885 : auxbas_pw_pool_target
886 : TYPE(qs_environment_type), POINTER :: qs_env_source, qs_env_target
887 :
888 140 : NULLIFY (mixed_cdft, dft_control_source, dft_control_target, force_env_qs_source, &
889 140 : force_env_qs_target, pw_env_source, pw_env_target, auxbas_pw_pool_source, &
890 140 : auxbas_pw_pool_target, qs_env_source, qs_env_target, mixed_env, &
891 140 : cdft_control_source, cdft_control_target)
892 140 : mixed_env => force_env%mixed_env
893 140 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
894 140 : CALL timeset(routineN, handle)
895 140 : IF (iforce_eval == 1) THEN
896 : jforce_eval = 1
897 : ELSE
898 82 : jforce_eval = iforce_eval - 1
899 : END IF
900 140 : nforce_eval = SIZE(force_env%sub_force_env)
901 280 : SELECT CASE (force_env%sub_force_env(jforce_eval)%force_env%in_use)
902 : CASE (use_qs_force, use_qmmm)
903 140 : force_env_qs_source => force_env%sub_force_env(jforce_eval)%force_env
904 140 : force_env_qs_target => force_env%sub_force_env(iforce_eval)%force_env
905 : CASE DEFAULT
906 140 : CPASSERT(.FALSE.)
907 : END SELECT
908 140 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
909 : CALL force_env_get(force_env=force_env_qs_source, &
910 124 : qs_env=qs_env_source)
911 : CALL force_env_get(force_env=force_env_qs_target, &
912 124 : qs_env=qs_env_target)
913 : ELSE
914 16 : qs_env_source => force_env_qs_source%qmmm_env%qs_env
915 16 : qs_env_target => force_env_qs_target%qmmm_env%qs_env
916 : END IF
917 140 : IF (iforce_eval == 1) THEN
918 : ! The first force_eval builds the weight function and gradients in qs_cdft_methods.F
919 : ! Set some flags so the constraint is saved if the constraint definitions are identical in all CDFT states
920 58 : CALL get_qs_env(qs_env_source, dft_control=dft_control_source)
921 58 : cdft_control_source => dft_control_source%qs_control%cdft_control
922 58 : cdft_control_source%external_control = .FALSE.
923 58 : cdft_control_source%need_pot = .TRUE.
924 58 : IF (mixed_cdft%identical_constraints) THEN
925 56 : cdft_control_source%transfer_pot = .TRUE.
926 : ELSE
927 2 : cdft_control_source%transfer_pot = .FALSE.
928 : END IF
929 58 : mixed_cdft%sim_step = mixed_cdft%sim_step + 1
930 : ELSE
931 : ! Transfer the constraint from the ith force_eval to the i+1th
932 : CALL get_qs_env(qs_env_source, dft_control=dft_control_source, &
933 82 : pw_env=pw_env_source)
934 82 : CALL pw_env_get(pw_env_source, auxbas_pw_pool=auxbas_pw_pool_source)
935 82 : cdft_control_source => dft_control_source%qs_control%cdft_control
936 : CALL get_qs_env(qs_env_target, dft_control=dft_control_target, &
937 82 : pw_env=pw_env_target)
938 82 : CALL pw_env_get(pw_env_target, auxbas_pw_pool=auxbas_pw_pool_target)
939 82 : cdft_control_target => dft_control_target%qs_control%cdft_control
940 : ! The constraint can be transferred only when the constraint defitions are identical in all CDFT states
941 82 : IF (mixed_cdft%identical_constraints) THEN
942 : ! Weight function
943 162 : DO igroup = 1, SIZE(cdft_control_target%group)
944 82 : ALLOCATE (cdft_control_target%group(igroup)%weight)
945 82 : CALL auxbas_pw_pool_target%create_pw(cdft_control_target%group(igroup)%weight)
946 : ! We have ensured that the grids are consistent => no danger in using explicit copy
947 82 : CALL pw_copy(cdft_control_source%group(igroup)%weight, cdft_control_target%group(igroup)%weight)
948 82 : CALL auxbas_pw_pool_source%give_back_pw(cdft_control_source%group(igroup)%weight)
949 162 : DEALLOCATE (cdft_control_source%group(igroup)%weight)
950 : END DO
951 : ! Cavity
952 80 : IF (cdft_control_source%type == outer_scf_becke_constraint) THEN
953 74 : IF (cdft_control_source%becke_control%cavity_confine) THEN
954 72 : CALL auxbas_pw_pool_target%create_pw(cdft_control_target%becke_control%cavity)
955 72 : CALL pw_copy(cdft_control_source%becke_control%cavity, cdft_control_target%becke_control%cavity)
956 72 : CALL auxbas_pw_pool_source%give_back_pw(cdft_control_source%becke_control%cavity)
957 : END IF
958 : END IF
959 : ! Gradients
960 80 : IF (calculate_forces) THEN
961 40 : DO igroup = 1, SIZE(cdft_control_source%group)
962 : bounds_of = (/LBOUND(cdft_control_source%group(igroup)%gradients, 1), &
963 : UBOUND(cdft_control_source%group(igroup)%gradients, 1), &
964 : LBOUND(cdft_control_source%group(igroup)%gradients, 2), &
965 : UBOUND(cdft_control_source%group(igroup)%gradients, 2), &
966 : LBOUND(cdft_control_source%group(igroup)%gradients, 3), &
967 : UBOUND(cdft_control_source%group(igroup)%gradients, 3), &
968 : LBOUND(cdft_control_source%group(igroup)%gradients, 4), &
969 340 : UBOUND(cdft_control_source%group(igroup)%gradients, 4)/)
970 : ALLOCATE (cdft_control_target%group(igroup)% &
971 : gradients(bounds_of(1):bounds_of(2), bounds_of(3):bounds_of(4), &
972 120 : bounds_of(5):bounds_of(6), bounds_of(7):bounds_of(8)))
973 17857864 : cdft_control_target%group(igroup)%gradients = cdft_control_source%group(igroup)%gradients
974 40 : DEALLOCATE (cdft_control_source%group(igroup)%gradients)
975 : END DO
976 : END IF
977 : ! Atomic weight functions needed for CDFT charges
978 80 : IF (cdft_control_source%atomic_charges) THEN
979 18 : IF (.NOT. ASSOCIATED(cdft_control_target%charge)) &
980 10 : ALLOCATE (cdft_control_target%charge(cdft_control_target%natoms))
981 54 : DO iatom = 1, cdft_control_target%natoms
982 36 : CALL auxbas_pw_pool_target%create_pw(cdft_control_target%charge(iatom))
983 36 : CALL pw_copy(cdft_control_source%charge(iatom), cdft_control_target%charge(iatom))
984 54 : CALL auxbas_pw_pool_source%give_back_pw(cdft_control_source%charge(iatom))
985 : END DO
986 : END IF
987 : ! Set some flags so the weight is not rebuilt during SCF
988 80 : cdft_control_target%external_control = .FALSE.
989 80 : cdft_control_target%need_pot = .FALSE.
990 : ! For states i+1 < nforce_eval, prevent deallocation of constraint
991 80 : IF (iforce_eval == nforce_eval) THEN
992 56 : cdft_control_target%transfer_pot = .FALSE.
993 : ELSE
994 24 : cdft_control_target%transfer_pot = .TRUE.
995 : END IF
996 80 : cdft_control_target%first_iteration = .FALSE.
997 : ELSE
998 : ! Force rebuild of constraint and dont save it
999 2 : cdft_control_target%external_control = .FALSE.
1000 2 : cdft_control_target%need_pot = .TRUE.
1001 2 : cdft_control_target%transfer_pot = .FALSE.
1002 2 : IF (first_call) THEN
1003 2 : cdft_control_target%first_iteration = .TRUE.
1004 : ELSE
1005 0 : cdft_control_target%first_iteration = .FALSE.
1006 : END IF
1007 : END IF
1008 : END IF
1009 : ! Set flags for ET coupling calculation
1010 140 : IF (mixed_env%do_mixed_et) THEN
1011 140 : IF (MODULO(mixed_cdft%sim_step, mixed_env%et_freq) == 0) THEN
1012 140 : IF (iforce_eval == 1) THEN
1013 58 : cdft_control_source%do_et = .TRUE.
1014 58 : cdft_control_source%calculate_metric = mixed_cdft%calculate_metric
1015 : ELSE
1016 82 : cdft_control_target%do_et = .TRUE.
1017 82 : cdft_control_target%calculate_metric = mixed_cdft%calculate_metric
1018 : END IF
1019 : ELSE
1020 0 : IF (iforce_eval == 1) THEN
1021 0 : cdft_control_source%do_et = .FALSE.
1022 0 : cdft_control_source%calculate_metric = .FALSE.
1023 : ELSE
1024 0 : cdft_control_target%do_et = .FALSE.
1025 0 : cdft_control_target%calculate_metric = .FALSE.
1026 : END IF
1027 : END IF
1028 : END IF
1029 140 : IF (iforce_eval == nforce_eval .AND. first_call) first_call = .FALSE.
1030 140 : CALL timestop(handle)
1031 :
1032 140 : END SUBROUTINE mixed_cdft_transfer_weight
1033 :
1034 : ! **************************************************************************************************
1035 : !> \brief In case CDFT states are treated in parallel, sets flags so that each CDFT state
1036 : !> builds their own weight functions and gradients
1037 : !> \param force_env the force_env that holds the CDFT sub_force_envs
1038 : !> \par History
1039 : !> 09.2018 created [Nico Holmberg]
1040 : ! **************************************************************************************************
1041 4 : SUBROUTINE mixed_cdft_set_flags(force_env)
1042 : TYPE(force_env_type), POINTER :: force_env
1043 :
1044 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_set_flags'
1045 :
1046 : INTEGER :: handle, iforce_eval, nforce_eval
1047 : LOGICAL, SAVE :: first_call = .TRUE.
1048 : TYPE(cdft_control_type), POINTER :: cdft_control
1049 : TYPE(dft_control_type), POINTER :: dft_control
1050 : TYPE(force_env_type), POINTER :: force_env_qs
1051 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1052 : TYPE(mixed_environment_type), POINTER :: mixed_env
1053 : TYPE(qs_environment_type), POINTER :: qs_env
1054 :
1055 2 : NULLIFY (mixed_cdft, dft_control, force_env_qs, qs_env, mixed_env, cdft_control)
1056 2 : mixed_env => force_env%mixed_env
1057 2 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
1058 2 : CALL timeset(routineN, handle)
1059 2 : nforce_eval = SIZE(force_env%sub_force_env)
1060 6 : DO iforce_eval = 1, nforce_eval
1061 4 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
1062 2 : SELECT CASE (force_env%sub_force_env(iforce_eval)%force_env%in_use)
1063 : CASE (use_qs_force, use_qmmm)
1064 0 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
1065 : CASE DEFAULT
1066 2 : CPASSERT(.FALSE.)
1067 : END SELECT
1068 2 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
1069 2 : CALL force_env_get(force_env=force_env_qs, qs_env=qs_env)
1070 : ELSE
1071 0 : qs_env => force_env_qs%qmmm_env%qs_env
1072 : END IF
1073 : ! All force_evals build the weight function and gradients in qs_cdft_methods.F
1074 : ! Update flags to match run type
1075 2 : CALL get_qs_env(qs_env, dft_control=dft_control)
1076 2 : cdft_control => dft_control%qs_control%cdft_control
1077 2 : cdft_control%external_control = .FALSE.
1078 2 : cdft_control%need_pot = .TRUE.
1079 2 : cdft_control%transfer_pot = .FALSE.
1080 2 : IF (first_call) THEN
1081 2 : cdft_control%first_iteration = .TRUE.
1082 : ELSE
1083 0 : cdft_control%first_iteration = .FALSE.
1084 : END IF
1085 : ! Set flags for ET coupling calculation
1086 4 : IF (mixed_env%do_mixed_et) THEN
1087 2 : IF (MODULO(mixed_cdft%sim_step, mixed_env%et_freq) == 0) THEN
1088 2 : cdft_control%do_et = .TRUE.
1089 2 : cdft_control%calculate_metric = mixed_cdft%calculate_metric
1090 : ELSE
1091 0 : cdft_control%do_et = .FALSE.
1092 0 : cdft_control%calculate_metric = .FALSE.
1093 : END IF
1094 : END IF
1095 : END DO
1096 2 : mixed_cdft%sim_step = mixed_cdft%sim_step + 1
1097 2 : IF (first_call) first_call = .FALSE.
1098 2 : CALL timestop(handle)
1099 :
1100 2 : END SUBROUTINE mixed_cdft_set_flags
1101 :
1102 : ! **************************************************************************************************
1103 : !> \brief Driver routine to calculate the electronic coupling(s) between CDFT states.
1104 : !> \param force_env the force_env that holds the CDFT states
1105 : !> \par History
1106 : !> 02.15 created [Nico Holmberg]
1107 : ! **************************************************************************************************
1108 188 : SUBROUTINE mixed_cdft_calculate_coupling(force_env)
1109 : TYPE(force_env_type), POINTER :: force_env
1110 :
1111 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_calculate_coupling'
1112 :
1113 : INTEGER :: handle
1114 :
1115 94 : CPASSERT(ASSOCIATED(force_env))
1116 94 : CALL timeset(routineN, handle)
1117 : ! Move needed arrays from individual CDFT states to the mixed CDFT env
1118 94 : CALL mixed_cdft_redistribute_arrays(force_env)
1119 : ! Calculate the mixed CDFT Hamiltonian and overlap matrices.
1120 : ! All work matrices defined in the wavefunction basis get deallocated on exit.
1121 : ! Any analyses which depend on these work matrices are performed within.
1122 94 : CALL mixed_cdft_interaction_matrices(force_env)
1123 : ! Calculate eletronic couplings between states (Lowdin/rotation)
1124 94 : CALL mixed_cdft_calculate_coupling_low(force_env)
1125 : ! Print out couplings
1126 94 : CALL mixed_cdft_print_couplings(force_env)
1127 : ! Block diagonalize the mixed CDFT Hamiltonian matrix
1128 94 : CALL mixed_cdft_block_diag(force_env)
1129 : ! CDFT Configuration Interaction
1130 94 : CALL mixed_cdft_configuration_interaction(force_env)
1131 : ! Clean up
1132 94 : CALL mixed_cdft_release_work(force_env)
1133 94 : CALL timestop(handle)
1134 :
1135 94 : END SUBROUTINE mixed_cdft_calculate_coupling
1136 :
1137 : ! **************************************************************************************************
1138 : !> \brief Routine to calculate the mixed CDFT Hamiltonian and overlap matrices.
1139 : !> \param force_env the force_env that holds the CDFT states
1140 : !> \par History
1141 : !> 11.17 created [Nico Holmberg]
1142 : ! **************************************************************************************************
1143 94 : SUBROUTINE mixed_cdft_interaction_matrices(force_env)
1144 : TYPE(force_env_type), POINTER :: force_env
1145 :
1146 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_interaction_matrices'
1147 :
1148 : INTEGER :: check_ao(2), check_mo(2), handle, iforce_eval, ipermutation, ispin, istate, ivar, &
1149 : j, jstate, k, moeigvalunit, mounit, nao, ncol_local, nforce_eval, nmo, npermutations, &
1150 : nrow_local, nspins, nvar
1151 94 : INTEGER, ALLOCATABLE, DIMENSION(:) :: ncol_mo, nrow_mo
1152 94 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: homo
1153 : LOGICAL :: nelectron_mismatch, print_mo, &
1154 : print_mo_eigval, should_scale, &
1155 : uniform_occupation
1156 : REAL(KIND=dp) :: c(2), eps_occupied, nelectron_tot, &
1157 : sum_a(2), sum_b(2)
1158 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coupling_nonortho, eigenv, energy, Sda
1159 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: H_mat, S_det, S_mat, strength, tmp_mat, &
1160 94 : W_diagonal, Wad, Wda
1161 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: a, b
1162 94 : REAL(KIND=dp), DIMENSION(:), POINTER :: mo_eigval
1163 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_mo, mo_mo_fmstruct
1164 : TYPE(cp_fm_type) :: inverse_mat, Tinverse, tmp2
1165 94 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: mo_overlap
1166 94 : TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: w_matrix_mo
1167 94 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: mixed_mo_coeff
1168 : TYPE(cp_logger_type), POINTER :: logger
1169 94 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: density_matrix, density_matrix_diff, &
1170 94 : w_matrix
1171 : TYPE(dbcsr_type), POINTER :: mixed_matrix_s
1172 : TYPE(dft_control_type), POINTER :: dft_control
1173 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1174 : TYPE(mixed_environment_type), POINTER :: mixed_env
1175 : TYPE(qs_energy_type), POINTER :: energy_qs
1176 : TYPE(qs_environment_type), POINTER :: qs_env
1177 : TYPE(section_vals_type), POINTER :: force_env_section, mixed_cdft_section, &
1178 : print_section
1179 :
1180 94 : NULLIFY (force_env_section, print_section, mixed_cdft_section, &
1181 94 : mixed_env, mixed_cdft, qs_env, dft_control, fm_struct_mo, &
1182 94 : density_matrix_diff, mo_mo_fmstruct, &
1183 94 : mixed_mo_coeff, mixed_matrix_s, &
1184 94 : density_matrix, energy_qs, w_matrix, mo_eigval)
1185 188 : logger => cp_get_default_logger()
1186 94 : CPASSERT(ASSOCIATED(force_env))
1187 94 : CALL timeset(routineN, handle)
1188 : CALL force_env_get(force_env=force_env, &
1189 94 : force_env_section=force_env_section)
1190 94 : mixed_env => force_env%mixed_env
1191 94 : nforce_eval = SIZE(force_env%sub_force_env)
1192 94 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1193 94 : IF (section_get_lval(print_section, "MO_OVERLAP_MATRIX")) THEN
1194 2 : print_mo = .TRUE.
1195 2 : mounit = cp_print_key_unit_nr(logger, print_section, extension='.moOverlap', on_file=.TRUE.)
1196 : ELSE
1197 : print_mo = .FALSE.
1198 : END IF
1199 94 : IF (section_get_lval(print_section, "MO_OVERLAP_EIGENVALUES")) THEN
1200 14 : print_mo_eigval = .TRUE.
1201 14 : moeigvalunit = cp_print_key_unit_nr(logger, print_section, extension='.moOverlapEigval', on_file=.TRUE.)
1202 : ELSE
1203 : print_mo_eigval = .FALSE.
1204 : END IF
1205 94 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
1206 : ! Get redistributed work matrices
1207 94 : CPASSERT(ASSOCIATED(mixed_cdft))
1208 94 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%mixed_mo_coeff))
1209 94 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%w_matrix))
1210 94 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%mixed_matrix_s))
1211 94 : mixed_mo_coeff => mixed_cdft%matrix%mixed_mo_coeff
1212 94 : w_matrix => mixed_cdft%matrix%w_matrix
1213 94 : mixed_matrix_s => mixed_cdft%matrix%mixed_matrix_s
1214 94 : IF (mixed_cdft%calculate_metric) THEN
1215 14 : CPASSERT(ASSOCIATED(mixed_cdft%matrix%density_matrix))
1216 14 : density_matrix => mixed_cdft%matrix%density_matrix
1217 : END IF
1218 : ! Get number of weight functions per state
1219 94 : nvar = SIZE(w_matrix, 2)
1220 94 : nspins = SIZE(mixed_mo_coeff, 2)
1221 : ! Check that the number of MOs/AOs is equal in every CDFT state
1222 376 : ALLOCATE (nrow_mo(nspins), ncol_mo(nspins))
1223 282 : DO ispin = 1, nspins
1224 188 : CALL cp_fm_get_info(mixed_mo_coeff(1, ispin), ncol_global=check_mo(1), nrow_global=check_ao(1))
1225 518 : DO iforce_eval = 2, nforce_eval
1226 236 : CALL cp_fm_get_info(mixed_mo_coeff(iforce_eval, ispin), ncol_global=check_mo(2), nrow_global=check_ao(2))
1227 236 : IF (check_ao(1) /= check_ao(2)) &
1228 : CALL cp_abort(__LOCATION__, &
1229 0 : "The number of atomic orbitals must be the same in every CDFT state.")
1230 236 : IF (check_mo(1) /= check_mo(2)) &
1231 : CALL cp_abort(__LOCATION__, &
1232 188 : "The number of molecular orbitals must be the same in every CDFT state.")
1233 : END DO
1234 : END DO
1235 : ! Allocate work
1236 94 : npermutations = nforce_eval*(nforce_eval - 1)/2 ! Size of upper triangular part
1237 1354 : ALLOCATE (w_matrix_mo(nforce_eval, nforce_eval, nvar))
1238 740 : ALLOCATE (mo_overlap(npermutations), S_det(npermutations, nspins))
1239 752 : ALLOCATE (a(nspins, nvar, npermutations), b(nspins, nvar, npermutations))
1240 804 : a = 0.0_dp
1241 804 : b = 0.0_dp
1242 94 : IF (mixed_cdft%calculate_metric) THEN
1243 106 : ALLOCATE (density_matrix_diff(npermutations, nspins))
1244 42 : DO ispin = 1, nspins
1245 78 : DO ipermutation = 1, npermutations
1246 36 : NULLIFY (density_matrix_diff(ipermutation, ispin)%matrix)
1247 36 : CALL dbcsr_init_p(density_matrix_diff(ipermutation, ispin)%matrix)
1248 36 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1249 : CALL dbcsr_copy(density_matrix_diff(ipermutation, ispin)%matrix, &
1250 64 : density_matrix(istate, ispin)%matrix, name="DENSITY_MATRIX")
1251 : END DO
1252 : END DO
1253 : END IF
1254 : ! Check for uniform occupations
1255 94 : uniform_occupation = .NOT. ALLOCATED(mixed_cdft%occupations)
1256 94 : should_scale = .FALSE.
1257 94 : IF (.NOT. uniform_occupation) THEN
1258 56 : ALLOCATE (homo(nforce_eval, nspins))
1259 14 : mixed_cdft_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT")
1260 14 : CALL section_vals_val_get(mixed_cdft_section, "EPS_OCCUPIED", r_val=eps_occupied)
1261 14 : IF (eps_occupied .GT. 1.0_dp .OR. eps_occupied .LT. 0.0_dp) &
1262 : CALL cp_abort(__LOCATION__, &
1263 0 : "Keyword EPS_OCCUPIED only accepts values between 0.0 and 1.0")
1264 14 : IF (mixed_cdft%eps_svd == 0.0_dp) &
1265 : CALL cp_warn(__LOCATION__, &
1266 : "The usage of SVD based matrix inversions with fractionally occupied "// &
1267 0 : "orbitals is strongly recommended to screen nearly orthogonal states.")
1268 28 : CALL section_vals_val_get(mixed_cdft_section, "SCALE_WITH_OCCUPATION_NUMBERS", l_val=should_scale)
1269 : END IF
1270 : ! Start the actual calculation
1271 282 : DO ispin = 1, nspins
1272 : ! Create the MOxMO fm struct (mo_mo_fm_pools%struct)
1273 : ! The number of MOs/AOs is equal to the number of columns/rows of mo_coeff(:,:)%matrix
1274 188 : NULLIFY (fm_struct_mo, mo_mo_fmstruct)
1275 188 : CALL cp_fm_get_info(mixed_mo_coeff(1, ispin), ncol_global=ncol_mo(ispin), nrow_global=nrow_mo(ispin))
1276 188 : nao = nrow_mo(ispin)
1277 188 : IF (uniform_occupation) THEN
1278 160 : nmo = ncol_mo(ispin)
1279 : ELSE
1280 28 : nmo = ncol_mo(ispin)
1281 : ! Find indices of highest (fractionally) occupied molecular orbital
1282 84 : homo(:, ispin) = nmo
1283 84 : DO istate = 1, nforce_eval
1284 152 : DO j = nmo, 1, -1
1285 124 : IF (mixed_cdft%occupations(istate, ispin)%array(j) .GE. eps_occupied) THEN
1286 56 : homo(istate, ispin) = j
1287 56 : EXIT
1288 : END IF
1289 : END DO
1290 : END DO
1291 : ! Make matrices square by using the largest homo and emit warning if a state has fewer occupied MOs
1292 : ! Although it would be possible to handle the nonsquare situation as well,
1293 : ! all CDFT states should be in the same spin state for meaningful results
1294 84 : nmo = MAXVAL(homo(:, ispin))
1295 : ! Also check that the number of electrons is conserved (using a fixed sensible threshold)
1296 28 : nelectron_mismatch = .FALSE.
1297 92 : nelectron_tot = SUM(mixed_cdft%occupations(1, ispin)%array(1:nmo))
1298 56 : DO istate = 2, nforce_eval
1299 92 : IF (ABS(SUM(mixed_cdft%occupations(istate, ispin)%array(1:nmo)) - nelectron_tot) .GT. 1.0E-4_dp) &
1300 28 : nelectron_mismatch = .TRUE.
1301 : END DO
1302 84 : IF (ANY(homo(:, ispin) /= nmo)) THEN
1303 0 : IF (ispin == 1) THEN
1304 : CALL cp_warn(__LOCATION__, &
1305 : "The number of occupied alpha MOs is not constant across all CDFT states. "// &
1306 0 : "Calculation proceeds but the results will likely be meaningless.")
1307 : ELSE
1308 : CALL cp_warn(__LOCATION__, &
1309 : "The number of occupied beta MOs is not constant across all CDFT states. "// &
1310 0 : "Calculation proceeds but the results will likely be meaningless.")
1311 : END IF
1312 28 : ELSE IF (nelectron_mismatch) THEN
1313 0 : IF (ispin == 1) THEN
1314 : CALL cp_warn(__LOCATION__, &
1315 : "The number of alpha electrons is not constant across all CDFT states. "// &
1316 0 : "Calculation proceeds but the results will likely be meaningless.")
1317 : ELSE
1318 : CALL cp_warn(__LOCATION__, &
1319 : "The number of beta electrons is not constant across all CDFT states. "// &
1320 0 : "Calculation proceeds but the results will likely be meaningless.")
1321 : END IF
1322 : END IF
1323 : END IF
1324 : CALL cp_fm_struct_create(fm_struct_mo, nrow_global=nao, ncol_global=nmo, &
1325 188 : context=mixed_cdft%blacs_env, para_env=force_env%para_env)
1326 : CALL cp_fm_struct_create(mo_mo_fmstruct, nrow_global=nmo, ncol_global=nmo, &
1327 188 : context=mixed_cdft%blacs_env, para_env=force_env%para_env)
1328 : ! Allocate work
1329 : CALL cp_fm_create(matrix=tmp2, matrix_struct=fm_struct_mo, &
1330 188 : name="ET_TMP_"//TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1331 188 : CALL cp_fm_struct_release(fm_struct_mo)
1332 : CALL cp_fm_create(matrix=inverse_mat, matrix_struct=mo_mo_fmstruct, &
1333 188 : name="INVERSE_"//TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1334 : CALL cp_fm_create(matrix=Tinverse, matrix_struct=mo_mo_fmstruct, &
1335 188 : name="T_INVERSE_"//TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1336 540 : DO istate = 1, npermutations
1337 : CALL cp_fm_create(matrix=mo_overlap(istate), matrix_struct=mo_mo_fmstruct, &
1338 : name="MO_OVERLAP_"//TRIM(ADJUSTL(cp_to_string(istate)))//"_"// &
1339 540 : TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1340 : END DO
1341 380 : DO ivar = 1, nvar
1342 812 : DO istate = 1, nforce_eval
1343 1768 : DO jstate = 1, nforce_eval
1344 1144 : IF (istate == jstate) CYCLE
1345 : CALL cp_fm_create(matrix=w_matrix_mo(istate, jstate, ivar), matrix_struct=mo_mo_fmstruct, &
1346 : name="W_"//TRIM(ADJUSTL(cp_to_string(istate)))//"_"// &
1347 : TRIM(ADJUSTL(cp_to_string(jstate)))//"_"// &
1348 1576 : TRIM(ADJUSTL(cp_to_string(ivar)))//"_MATRIX")
1349 : END DO
1350 : END DO
1351 : END DO
1352 188 : CALL cp_fm_struct_release(mo_mo_fmstruct)
1353 : ! Remove empty MOs and (possibly) scale rest with occupation numbers
1354 188 : IF (.NOT. uniform_occupation) THEN
1355 84 : DO iforce_eval = 1, nforce_eval
1356 56 : CALL cp_fm_to_fm(mixed_mo_coeff(iforce_eval, ispin), tmp2, nmo, 1, 1)
1357 56 : CALL cp_fm_release(mixed_mo_coeff(iforce_eval, ispin))
1358 : CALL cp_fm_create(mixed_mo_coeff(iforce_eval, ispin), &
1359 : matrix_struct=tmp2%matrix_struct, &
1360 : name="MO_COEFF_"//TRIM(ADJUSTL(cp_to_string(iforce_eval)))//"_" &
1361 56 : //TRIM(ADJUSTL(cp_to_string(ispin)))//"_MATRIX")
1362 56 : CALL cp_fm_to_fm(tmp2, mixed_mo_coeff(iforce_eval, ispin))
1363 56 : IF (should_scale) &
1364 : CALL cp_fm_column_scale(mixed_mo_coeff(iforce_eval, ispin), &
1365 40 : mixed_cdft%occupations(iforce_eval, ispin)%array(1:nmo))
1366 84 : DEALLOCATE (mixed_cdft%occupations(iforce_eval, ispin)%array)
1367 : END DO
1368 : END IF
1369 : ! calculate the MO overlaps (C_j)^T S C_i
1370 188 : ipermutation = 0
1371 612 : DO istate = 1, nforce_eval
1372 964 : DO jstate = istate + 1, nforce_eval
1373 352 : ipermutation = ipermutation + 1
1374 : CALL cp_dbcsr_sm_fm_multiply(mixed_matrix_s, mixed_mo_coeff(istate, ispin), &
1375 352 : tmp2, nmo, 1.0_dp, 0.0_dp)
1376 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
1377 : mixed_mo_coeff(jstate, ispin), &
1378 352 : tmp2, 0.0_dp, mo_overlap(ipermutation))
1379 352 : IF (print_mo) &
1380 : CALL cp_fm_write_formatted(mo_overlap(ipermutation), mounit, &
1381 : "# MO overlap matrix (step "//TRIM(ADJUSTL(cp_to_string(mixed_cdft%sim_step)))// &
1382 : "): CDFT states "//TRIM(ADJUSTL(cp_to_string(istate)))//" and "// &
1383 : TRIM(ADJUSTL(cp_to_string(jstate)))//" (spin "// &
1384 428 : TRIM(ADJUSTL(cp_to_string(ispin)))//")")
1385 : END DO
1386 : END DO
1387 : ! calculate the MO-representations of the restraint matrices of all CDFT states
1388 380 : DO ivar = 1, nvar
1389 812 : DO jstate = 1, nforce_eval
1390 1768 : DO istate = 1, nforce_eval
1391 1144 : IF (istate == jstate) CYCLE
1392 : ! State i: (C_j)^T W_i C_i
1393 : CALL cp_dbcsr_sm_fm_multiply(w_matrix(istate, ivar)%matrix, &
1394 : mixed_mo_coeff(istate, ispin), &
1395 712 : tmp2, nmo, 1.0_dp, 0.0_dp)
1396 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
1397 : mixed_mo_coeff(jstate, ispin), &
1398 1576 : tmp2, 0.0_dp, w_matrix_mo(istate, jstate, ivar))
1399 : END DO
1400 : END DO
1401 : END DO
1402 540 : DO ipermutation = 1, npermutations
1403 : ! Invert and calculate determinant of MO overlaps
1404 352 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1405 352 : IF (print_mo_eigval) THEN
1406 28 : NULLIFY (mo_eigval)
1407 : CALL cp_fm_invert(mo_overlap(ipermutation), inverse_mat, &
1408 : S_det(ipermutation, ispin), eps_svd=mixed_cdft%eps_svd, &
1409 28 : eigval=mo_eigval)
1410 28 : IF (moeigvalunit > 0) THEN
1411 14 : IF (mixed_cdft%eps_svd == 0.0_dp) THEN
1412 : WRITE (moeigvalunit, '(A,I2,A,I2,A,I1,A)') &
1413 0 : "# MO Overlap matrix eigenvalues for CDFT states ", istate, " and ", jstate, &
1414 0 : " (spin ", ispin, ")"
1415 : ELSE
1416 : WRITE (moeigvalunit, '(A,I2,A,I2,A,I1,A)') &
1417 14 : "# MO Overlap matrix singular values for CDFT states ", istate, " and ", jstate, &
1418 28 : " (spin ", ispin, ")"
1419 : END IF
1420 14 : WRITE (moeigvalunit, '(A1, A9, A12)') "#", "Index", ADJUSTL("Value")
1421 46 : DO j = 1, SIZE(mo_eigval)
1422 46 : WRITE (moeigvalunit, '(I10, F12.8)') j, mo_eigval(j)
1423 : END DO
1424 : END IF
1425 28 : DEALLOCATE (mo_eigval)
1426 : ELSE
1427 : CALL cp_fm_invert(mo_overlap(ipermutation), inverse_mat, &
1428 324 : S_det(ipermutation, ispin), eps_svd=mixed_cdft%eps_svd)
1429 : END IF
1430 352 : CALL cp_fm_get_info(inverse_mat, nrow_local=nrow_local, ncol_local=ncol_local)
1431 : ! Calculate <Psi_i | w_j(r) | Psi_j> for ivar:th constraint
1432 912 : DO j = 1, ncol_local
1433 1422 : DO k = 1, nrow_local
1434 1585 : DO ivar = 1, nvar
1435 : b(ispin, ivar, ipermutation) = b(ispin, ivar, ipermutation) + &
1436 : w_matrix_mo(jstate, istate, ivar)%local_data(k, j)* &
1437 1025 : inverse_mat%local_data(k, j)
1438 : END DO
1439 : END DO
1440 : END DO
1441 : ! Calculate <Psi_j | w_i(r) | Psi_i> for ivar:th constraint
1442 352 : CALL cp_fm_transpose(inverse_mat, Tinverse)
1443 912 : DO j = 1, ncol_local
1444 1422 : DO k = 1, nrow_local
1445 1585 : DO ivar = 1, nvar
1446 : a(ispin, ivar, ipermutation) = a(ispin, ivar, ipermutation) + &
1447 : w_matrix_mo(istate, jstate, ivar)%local_data(k, j)* &
1448 1025 : Tinverse%local_data(k, j)
1449 : END DO
1450 : END DO
1451 : END DO
1452 : ! Handle different constraint types
1453 708 : DO ivar = 1, nvar
1454 356 : SELECT CASE (mixed_cdft%constraint_type(ivar, istate))
1455 : CASE (cdft_charge_constraint)
1456 : ! No action needed
1457 : CASE (cdft_magnetization_constraint)
1458 0 : IF (ispin == 2) a(ispin, ivar, ipermutation) = -a(ispin, ivar, ipermutation)
1459 : CASE (cdft_alpha_constraint)
1460 : ! Constraint applied to alpha electrons only, set integrals involving beta to zero
1461 4 : IF (ispin == 2) a(ispin, ivar, ipermutation) = 0.0_dp
1462 : CASE (cdft_beta_constraint)
1463 : ! Constraint applied to beta electrons only, set integrals involving alpha to zero
1464 4 : IF (ispin == 1) a(ispin, ivar, ipermutation) = 0.0_dp
1465 : CASE DEFAULT
1466 356 : CPABORT("Unknown constraint type.")
1467 : END SELECT
1468 352 : SELECT CASE (mixed_cdft%constraint_type(ivar, jstate))
1469 : CASE (cdft_charge_constraint)
1470 : ! No action needed
1471 : CASE (cdft_magnetization_constraint)
1472 0 : IF (ispin == 2) b(ispin, ivar, ipermutation) = -b(ispin, ivar, ipermutation)
1473 : CASE (cdft_alpha_constraint)
1474 : ! Constraint applied to alpha electrons only, set integrals involving beta to zero
1475 4 : IF (ispin == 2) b(ispin, ivar, ipermutation) = 0.0_dp
1476 : CASE (cdft_beta_constraint)
1477 : ! Constraint applied to beta electrons only, set integrals involving alpha to zero
1478 4 : IF (ispin == 1) b(ispin, ivar, ipermutation) = 0.0_dp
1479 : CASE DEFAULT
1480 356 : CPABORT("Unknown constraint type.")
1481 : END SELECT
1482 : END DO
1483 : ! Compute density matrix difference P = P_j - P_i
1484 352 : IF (mixed_cdft%calculate_metric) &
1485 : CALL dbcsr_add(density_matrix_diff(ipermutation, ispin)%matrix, &
1486 36 : density_matrix(jstate, ispin)%matrix, -1.0_dp, 1.0_dp)
1487 : !
1488 1064 : CALL force_env%para_env%sum(a(ispin, :, ipermutation))
1489 1956 : CALL force_env%para_env%sum(b(ispin, :, ipermutation))
1490 : END DO
1491 : ! Release work
1492 188 : CALL cp_fm_release(tmp2)
1493 380 : DO ivar = 1, nvar
1494 812 : DO jstate = 1, nforce_eval
1495 1768 : DO istate = 1, nforce_eval
1496 1144 : IF (istate == jstate) CYCLE
1497 1576 : CALL cp_fm_release(w_matrix_mo(istate, jstate, ivar))
1498 : END DO
1499 : END DO
1500 : END DO
1501 540 : DO ipermutation = 1, npermutations
1502 540 : CALL cp_fm_release(mo_overlap(ipermutation))
1503 : END DO
1504 188 : CALL cp_fm_release(Tinverse)
1505 282 : CALL cp_fm_release(inverse_mat)
1506 : END DO
1507 94 : DEALLOCATE (mo_overlap)
1508 94 : DEALLOCATE (w_matrix_mo)
1509 94 : IF (.NOT. uniform_occupation) THEN
1510 14 : DEALLOCATE (homo)
1511 14 : DEALLOCATE (mixed_cdft%occupations)
1512 : END IF
1513 94 : IF (print_mo) &
1514 : CALL cp_print_key_finished_output(mounit, logger, force_env_section, &
1515 2 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO", on_file=.TRUE.)
1516 94 : IF (print_mo_eigval) &
1517 : CALL cp_print_key_finished_output(moeigvalunit, logger, force_env_section, &
1518 14 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO", on_file=.TRUE.)
1519 : ! solve eigenstates for the projector matrix
1520 376 : ALLOCATE (Wda(nvar, npermutations))
1521 282 : ALLOCATE (Sda(npermutations))
1522 98 : IF (.NOT. mixed_cdft%identical_constraints) ALLOCATE (Wad(nvar, npermutations))
1523 270 : DO ipermutation = 1, npermutations
1524 176 : IF (nspins == 2) THEN
1525 176 : Sda(ipermutation) = ABS(S_det(ipermutation, 1)*S_det(ipermutation, 2))
1526 : ELSE
1527 0 : Sda(ipermutation) = S_det(ipermutation, 1)**2
1528 : END IF
1529 : ! Finalize <Psi_j | w_i(r) | Psi_i> by multiplication with Sda
1530 448 : DO ivar = 1, nvar
1531 354 : IF (mixed_cdft%identical_constraints) THEN
1532 : Wda(ivar, ipermutation) = (SUM(a(:, ivar, ipermutation)) + SUM(b(:, ivar, ipermutation)))* &
1533 880 : Sda(ipermutation)/2.0_dp
1534 : ELSE
1535 6 : Wda(ivar, ipermutation) = SUM(a(:, ivar, ipermutation))*Sda(ipermutation)
1536 6 : Wad(ivar, ipermutation) = SUM(b(:, ivar, ipermutation))*Sda(ipermutation)
1537 : END IF
1538 : END DO
1539 : END DO
1540 94 : DEALLOCATE (a, b, S_det)
1541 : ! Transfer info about the constraint calculations
1542 752 : ALLOCATE (W_diagonal(nvar, nforce_eval), strength(nvar, nforce_eval), energy(nforce_eval))
1543 522 : W_diagonal = 0.0_dp
1544 306 : DO iforce_eval = 1, nforce_eval
1545 522 : strength(:, iforce_eval) = mixed_env%strength(iforce_eval, :)
1546 : END DO
1547 306 : energy = 0.0_dp
1548 306 : DO iforce_eval = 1, nforce_eval
1549 212 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
1550 176 : IF (force_env%mixed_env%do_mixed_qmmm_cdft) THEN
1551 24 : qs_env => force_env%sub_force_env(iforce_eval)%force_env%qmmm_env%qs_env
1552 : ELSE
1553 152 : CALL force_env_get(force_env%sub_force_env(iforce_eval)%force_env, qs_env=qs_env)
1554 : END IF
1555 176 : CALL get_qs_env(qs_env, energy=energy_qs, dft_control=dft_control)
1556 270 : IF (force_env%sub_force_env(iforce_eval)%force_env%para_env%is_source()) THEN
1557 214 : W_diagonal(:, iforce_eval) = dft_control%qs_control%cdft_control%value(:)
1558 142 : energy(iforce_eval) = energy_qs%total
1559 : END IF
1560 : END DO
1561 94 : CALL force_env%para_env%sum(W_diagonal)
1562 94 : CALL force_env%para_env%sum(energy)
1563 : CALL mixed_cdft_result_type_set(mixed_cdft%results, Wda=Wda, W_diagonal=W_diagonal, &
1564 94 : energy=energy, strength=strength)
1565 94 : IF (.NOT. mixed_cdft%identical_constraints) CALL mixed_cdft_result_type_set(mixed_cdft%results, Wad=Wad)
1566 : ! Construct S
1567 376 : ALLOCATE (S_mat(nforce_eval, nforce_eval))
1568 306 : DO istate = 1, nforce_eval
1569 306 : S_mat(istate, istate) = 1.0_dp
1570 : END DO
1571 270 : DO ipermutation = 1, npermutations
1572 176 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1573 176 : S_mat(istate, jstate) = Sda(ipermutation)
1574 270 : S_mat(jstate, istate) = Sda(ipermutation)
1575 : END DO
1576 94 : CALL mixed_cdft_result_type_set(mixed_cdft%results, S=S_mat)
1577 : ! Invert S via eigendecomposition and compute S^-(1/2)
1578 376 : ALLOCATE (eigenv(nforce_eval), tmp_mat(nforce_eval, nforce_eval))
1579 94 : CALL diamat_all(S_mat, eigenv, .TRUE.)
1580 870 : tmp_mat = 0.0_dp
1581 306 : DO istate = 1, nforce_eval
1582 212 : IF (eigenv(istate) .LT. 1.0e-14_dp) THEN
1583 : ! Safeguard against division with 0 and negative numbers
1584 10 : eigenv(istate) = 1.0e-14_dp
1585 : CALL cp_warn(__LOCATION__, &
1586 : "The overlap matrix is numerically nearly singular. "// &
1587 10 : "Calculation proceeds but the results might be meaningless.")
1588 : END IF
1589 306 : tmp_mat(istate, istate) = 1.0_dp/SQRT(eigenv(istate))
1590 : END DO
1591 6542 : tmp_mat(:, :) = MATMUL(tmp_mat, TRANSPOSE(S_mat))
1592 10850 : S_mat(:, :) = MATMUL(S_mat, tmp_mat) ! S^(-1/2)
1593 94 : CALL mixed_cdft_result_type_set(mixed_cdft%results, S_minushalf=S_mat)
1594 94 : DEALLOCATE (eigenv, tmp_mat, S_mat)
1595 : ! Construct nonorthogonal diabatic Hamiltonian matrix H''
1596 282 : ALLOCATE (H_mat(nforce_eval, nforce_eval))
1597 112 : IF (mixed_cdft%nonortho_coupling) ALLOCATE (coupling_nonortho(npermutations))
1598 306 : DO istate = 1, nforce_eval
1599 306 : H_mat(istate, istate) = energy(istate)
1600 : END DO
1601 270 : DO ipermutation = 1, npermutations
1602 176 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1603 176 : sum_a = 0.0_dp
1604 176 : sum_b = 0.0_dp
1605 354 : DO ivar = 1, nvar
1606 : ! V_J * <Psi_J | w_J(r) | Psi_J>
1607 178 : sum_b(1) = sum_b(1) + strength(ivar, jstate)*W_diagonal(ivar, jstate)
1608 : ! V_I * <Psi_I | w_I(r) | Psi_I>
1609 178 : sum_a(1) = sum_a(1) + strength(ivar, istate)*W_diagonal(ivar, istate)
1610 354 : IF (mixed_cdft%identical_constraints) THEN
1611 : ! V_J * W_IJ
1612 176 : sum_b(2) = sum_b(2) + strength(ivar, jstate)*Wda(ivar, ipermutation)
1613 : ! V_I * W_JI
1614 176 : sum_a(2) = sum_a(2) + strength(ivar, istate)*Wda(ivar, ipermutation)
1615 : ELSE
1616 : ! V_J * W_IJ
1617 2 : sum_b(2) = sum_b(2) + strength(ivar, jstate)*Wad(ivar, ipermutation)
1618 : ! V_I * W_JI
1619 2 : sum_a(2) = sum_a(2) + strength(ivar, istate)*Wda(ivar, ipermutation)
1620 : END IF
1621 : END DO
1622 : ! Denote F_X = <Psi_X | H_X + V_X*w_X(r) | Psi_X> = E_X + V_X*<Psi_X | w_X(r) | Psi_X>
1623 : ! H_IJ = F_J*S_IJ - V_J * W_IJ
1624 176 : c(1) = (energy(jstate) + sum_b(1))*Sda(ipermutation) - sum_b(2)
1625 : ! H_JI = F_I*S_JI - V_I * W_JI
1626 176 : c(2) = (energy(istate) + sum_a(1))*Sda(ipermutation) - sum_a(2)
1627 : ! H''(I,J) = 0.5*(H_IJ+H_JI) = H''(J,I)
1628 176 : H_mat(istate, jstate) = (c(1) + c(2))*0.5_dp
1629 176 : H_mat(jstate, istate) = H_mat(istate, jstate)
1630 446 : IF (mixed_cdft%nonortho_coupling) coupling_nonortho(ipermutation) = H_mat(istate, jstate)
1631 : END DO
1632 94 : CALL mixed_cdft_result_type_set(mixed_cdft%results, H=H_mat)
1633 94 : DEALLOCATE (H_mat, W_diagonal, Wda, strength, energy, Sda)
1634 94 : IF (ALLOCATED(Wad)) DEALLOCATE (Wad)
1635 94 : IF (mixed_cdft%nonortho_coupling) THEN
1636 18 : CALL mixed_cdft_result_type_set(mixed_cdft%results, nonortho=coupling_nonortho)
1637 18 : DEALLOCATE (coupling_nonortho)
1638 : END IF
1639 : ! Compute metric to assess reliability of coupling
1640 94 : IF (mixed_cdft%calculate_metric) CALL mixed_cdft_calculate_metric(force_env, mixed_cdft, density_matrix_diff, ncol_mo)
1641 : ! Compute coupling also with the wavefunction overlap method, see Migliore2009
1642 : ! Requires the unconstrained KS ground state wavefunction as input
1643 94 : IF (mixed_cdft%wfn_overlap_method) THEN
1644 8 : IF (.NOT. uniform_occupation) &
1645 : CALL cp_abort(__LOCATION__, &
1646 0 : "Wavefunction overlap method supports only uniformly occupied MOs.")
1647 8 : CALL mixed_cdft_wfn_overlap_method(force_env, mixed_cdft, ncol_mo, nrow_mo)
1648 : END IF
1649 : ! Release remaining work
1650 94 : DEALLOCATE (nrow_mo, ncol_mo)
1651 94 : CALL mixed_cdft_work_type_release(mixed_cdft%matrix)
1652 94 : CALL timestop(handle)
1653 :
1654 282 : END SUBROUTINE mixed_cdft_interaction_matrices
1655 :
1656 : ! **************************************************************************************************
1657 : !> \brief Routine to calculate the CDFT electronic couplings.
1658 : !> \param force_env the force_env that holds the CDFT states
1659 : !> \par History
1660 : !> 11.17 created [Nico Holmberg]
1661 : ! **************************************************************************************************
1662 94 : SUBROUTINE mixed_cdft_calculate_coupling_low(force_env)
1663 : TYPE(force_env_type), POINTER :: force_env
1664 :
1665 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_calculate_coupling_low'
1666 :
1667 : INTEGER :: handle, ipermutation, istate, jstate, &
1668 : nforce_eval, npermutations, nvar
1669 : LOGICAL :: use_lowdin, use_rotation
1670 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coupling_lowdin, coupling_rotation, &
1671 94 : eigenv
1672 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, W_mat
1673 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1674 :
1675 94 : NULLIFY (mixed_cdft)
1676 94 : CPASSERT(ASSOCIATED(force_env))
1677 94 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
1678 94 : CALL timeset(routineN, handle)
1679 94 : CPASSERT(ASSOCIATED(mixed_cdft))
1680 94 : CPASSERT(ALLOCATED(mixed_cdft%results%W_diagonal))
1681 94 : CPASSERT(ALLOCATED(mixed_cdft%results%Wda))
1682 94 : CPASSERT(ALLOCATED(mixed_cdft%results%S_minushalf))
1683 94 : CPASSERT(ALLOCATED(mixed_cdft%results%H))
1684 : ! Decide which methods to use for computing the coupling
1685 : ! Default behavior is to use rotation when a single constraint is active, otherwise uses Lowdin orthogonalization
1686 : ! The latter can also be explicitly requested when a single constraint is active
1687 : ! Possibly computes the coupling additionally with the wavefunction overlap method
1688 94 : nforce_eval = SIZE(mixed_cdft%results%H, 1)
1689 94 : nvar = SIZE(mixed_cdft%results%Wda, 1)
1690 94 : npermutations = nforce_eval*(nforce_eval - 1)/2
1691 376 : ALLOCATE (tmp_mat(nforce_eval, nforce_eval))
1692 94 : IF (nvar == 1 .AND. mixed_cdft%identical_constraints) THEN
1693 90 : use_rotation = .TRUE.
1694 90 : use_lowdin = mixed_cdft%use_lowdin
1695 : ELSE
1696 : use_rotation = .FALSE.
1697 : use_lowdin = .TRUE.
1698 : END IF
1699 : ! Calculate coupling by rotating the CDFT states to eigenstates of the weight matrix W (single constraint only)
1700 : IF (use_rotation) THEN
1701 : ! Construct W
1702 540 : ALLOCATE (W_mat(nforce_eval, nforce_eval), coupling_rotation(npermutations))
1703 270 : ALLOCATE (eigenv(nforce_eval))
1704 : ! W_mat(i, i) = N_i where N_i is the value of the constraint in state i
1705 294 : DO istate = 1, nforce_eval
1706 498 : W_mat(istate, istate) = SUM(mixed_cdft%results%W_diagonal(:, istate))
1707 : END DO
1708 : ! W_mat(i, j) = <Psi_i|w(r)|Psi_j>
1709 262 : DO ipermutation = 1, npermutations
1710 172 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1711 344 : W_mat(istate, jstate) = SUM(mixed_cdft%results%Wda(:, ipermutation))
1712 434 : W_mat(jstate, istate) = W_mat(istate, jstate)
1713 : END DO
1714 : ! Solve generalized eigenvalue equation WV = SVL
1715 : ! Convert to standard eigenvalue problem via symmetric orthogonalisation
1716 5740 : tmp_mat(:, :) = MATMUL(W_mat, mixed_cdft%results%S_minushalf) ! W * S^(-1/2)
1717 5830 : W_mat(:, :) = MATMUL(mixed_cdft%results%S_minushalf, tmp_mat) ! W' = S^(-1/2) * W * S^(-1/2)
1718 90 : CALL diamat_all(W_mat, eigenv, .TRUE.) ! Solve W'V' = AV'
1719 9886 : tmp_mat(:, :) = MATMUL(mixed_cdft%results%S_minushalf, W_mat) ! Reverse transformation V = S^(-1/2) V'
1720 : ! Construct final, orthogonal diabatic Hamiltonian matrix H
1721 9796 : W_mat(:, :) = MATMUL(mixed_cdft%results%H, tmp_mat) ! H'' * V
1722 10818 : W_mat(:, :) = MATMUL(TRANSPOSE(tmp_mat), W_mat) ! H = V^T * H'' * V
1723 262 : DO ipermutation = 1, npermutations
1724 172 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1725 262 : coupling_rotation(ipermutation) = W_mat(istate, jstate)
1726 : END DO
1727 90 : CALL mixed_cdft_result_type_set(mixed_cdft%results, rotation=coupling_rotation)
1728 90 : DEALLOCATE (W_mat, coupling_rotation, eigenv)
1729 : END IF
1730 : ! Calculate coupling by Lowdin orthogonalization
1731 94 : IF (use_lowdin) THEN
1732 60 : ALLOCATE (coupling_lowdin(npermutations))
1733 920 : tmp_mat(:, :) = MATMUL(mixed_cdft%results%H, mixed_cdft%results%S_minushalf) ! H'' * S^(-1/2)
1734 : ! Final orthogonal diabatic Hamiltonian matrix H
1735 740 : tmp_mat(:, :) = MATMUL(mixed_cdft%results%S_minushalf, tmp_mat) ! H = S^(-1/2) * H'' * S^(-1/2)
1736 40 : DO ipermutation = 1, npermutations
1737 20 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
1738 40 : coupling_lowdin(ipermutation) = tmp_mat(istate, jstate)
1739 : END DO
1740 20 : CALL mixed_cdft_result_type_set(mixed_cdft%results, lowdin=coupling_lowdin)
1741 20 : DEALLOCATE (coupling_lowdin)
1742 : END IF
1743 94 : DEALLOCATE (tmp_mat)
1744 94 : CALL timestop(handle)
1745 :
1746 188 : END SUBROUTINE mixed_cdft_calculate_coupling_low
1747 :
1748 : ! **************************************************************************************************
1749 : !> \brief Performs a configuration interaction calculation in the basis spanned by the CDFT states.
1750 : !> \param force_env the force_env that holds the CDFT states
1751 : !> \par History
1752 : !> 11.17 created [Nico Holmberg]
1753 : ! **************************************************************************************************
1754 94 : SUBROUTINE mixed_cdft_configuration_interaction(force_env)
1755 : TYPE(force_env_type), POINTER :: force_env
1756 :
1757 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_configuration_interaction'
1758 :
1759 : INTEGER :: handle, info, iounit, istate, ivar, &
1760 : nforce_eval, work_array_size
1761 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenv, work
1762 94 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: H_mat, H_mat_copy, S_mat, S_mat_copy
1763 : REAL(KIND=dp), EXTERNAL :: dnrm2
1764 : TYPE(cp_logger_type), POINTER :: logger
1765 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1766 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
1767 :
1768 : EXTERNAL :: dsygv
1769 :
1770 94 : NULLIFY (logger, force_env_section, print_section, mixed_cdft)
1771 :
1772 94 : CPASSERT(ASSOCIATED(force_env))
1773 94 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
1774 94 : CPASSERT(ASSOCIATED(mixed_cdft))
1775 :
1776 94 : IF (.NOT. mixed_cdft%do_ci) RETURN
1777 :
1778 14 : logger => cp_get_default_logger()
1779 14 : CALL timeset(routineN, handle)
1780 : CALL force_env_get(force_env=force_env, &
1781 14 : force_env_section=force_env_section)
1782 14 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1783 14 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
1784 :
1785 14 : CPASSERT(ALLOCATED(mixed_cdft%results%S))
1786 14 : CPASSERT(ALLOCATED(mixed_cdft%results%H))
1787 14 : nforce_eval = SIZE(mixed_cdft%results%S, 1)
1788 84 : ALLOCATE (S_mat(nforce_eval, nforce_eval), H_mat(nforce_eval, nforce_eval))
1789 42 : ALLOCATE (eigenv(nforce_eval))
1790 126 : S_mat(:, :) = mixed_cdft%results%S(:, :)
1791 126 : H_mat(:, :) = mixed_cdft%results%H(:, :)
1792 : ! Workspace query
1793 14 : ALLOCATE (work(1))
1794 14 : info = 0
1795 70 : ALLOCATE (H_mat_copy(nforce_eval, nforce_eval), S_mat_copy(nforce_eval, nforce_eval))
1796 126 : H_mat_copy(:, :) = H_mat(:, :) ! Need explicit copies because dsygv destroys original values
1797 126 : S_mat_copy(:, :) = S_mat(:, :)
1798 14 : CALL dsygv(1, 'V', 'U', nforce_eval, H_mat_copy, nforce_eval, S_mat_copy, nforce_eval, eigenv, work, -1, info)
1799 14 : work_array_size = NINT(work(1))
1800 14 : DEALLOCATE (H_mat_copy, S_mat_copy)
1801 : ! Allocate work array
1802 14 : DEALLOCATE (work)
1803 42 : ALLOCATE (work(work_array_size))
1804 1102 : work = 0.0_dp
1805 : ! Solve Hc = eSc
1806 14 : info = 0
1807 14 : CALL dsygv(1, 'V', 'U', nforce_eval, H_mat, nforce_eval, S_mat, nforce_eval, eigenv, work, work_array_size, info)
1808 14 : IF (info /= 0) THEN
1809 0 : IF (info > nforce_eval) THEN
1810 0 : CPABORT("Matrix S is not positive definite")
1811 : ELSE
1812 0 : CPABORT("Diagonalization of H matrix failed.")
1813 : END IF
1814 : END IF
1815 : ! dsygv returns eigenvectors (stored in columns of H_mat) that are normalized to H^T * S * H = I
1816 : ! Renormalize eigenvectors to H^T * H = I
1817 46 : DO ivar = 1, nforce_eval
1818 126 : H_mat(:, ivar) = H_mat(:, ivar)/dnrm2(nforce_eval, H_mat(:, ivar), 1)
1819 : END DO
1820 14 : DEALLOCATE (work)
1821 14 : IF (iounit > 0) THEN
1822 7 : WRITE (iounit, '(/,T3,A)') '------------------ CDFT Configuration Interaction (CDFT-CI) ------------------'
1823 23 : DO ivar = 1, nforce_eval
1824 16 : IF (ivar == 1) THEN
1825 7 : WRITE (iounit, '(T3,A,T58,(3X,F20.14))') 'Ground state energy:', eigenv(ivar)
1826 : ELSE
1827 9 : WRITE (iounit, '(/,T3,A,I2,A,T58,(3X,F20.14))') 'Excited state (', ivar - 1, ' ) energy:', eigenv(ivar)
1828 : END IF
1829 43 : DO istate = 1, nforce_eval, 2
1830 36 : IF (istate == 1) THEN
1831 : WRITE (iounit, '(T3,A,T54,(3X,2F12.6))') &
1832 16 : 'Expansion coefficients:', H_mat(istate, ivar), H_mat(istate + 1, ivar)
1833 4 : ELSE IF (istate .LT. nforce_eval) THEN
1834 4 : WRITE (iounit, '(T54,(3X,2F12.6))') H_mat(istate, ivar), H_mat(istate + 1, ivar)
1835 : ELSE
1836 0 : WRITE (iounit, '(T54,(3X,F12.6))') H_mat(istate, ivar)
1837 : END IF
1838 : END DO
1839 : END DO
1840 : WRITE (iounit, '(T3,A)') &
1841 7 : '------------------------------------------------------------------------------'
1842 : END IF
1843 14 : DEALLOCATE (S_mat, H_mat, eigenv)
1844 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
1845 14 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1846 14 : CALL timestop(handle)
1847 :
1848 14 : END SUBROUTINE mixed_cdft_configuration_interaction
1849 : ! **************************************************************************************************
1850 : !> \brief Block diagonalizes the mixed CDFT Hamiltonian matrix.
1851 : !> \param force_env the force_env that holds the CDFT states
1852 : !> \par History
1853 : !> 11.17 created [Nico Holmberg]
1854 : !> 01.18 added recursive diagonalization
1855 : !> split to subroutines [Nico Holmberg]
1856 : ! **************************************************************************************************
1857 94 : SUBROUTINE mixed_cdft_block_diag(force_env)
1858 : TYPE(force_env_type), POINTER :: force_env
1859 :
1860 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_block_diag'
1861 :
1862 : INTEGER :: handle, i, iounit, irecursion, j, n, &
1863 : nblk, nforce_eval, nrecursion
1864 : LOGICAL :: ignore_excited
1865 94 : TYPE(cp_1d_i_p_type), ALLOCATABLE, DIMENSION(:) :: blocks
1866 94 : TYPE(cp_1d_r_p_type), ALLOCATABLE, DIMENSION(:) :: eigenvalues
1867 94 : TYPE(cp_2d_r_p_type), ALLOCATABLE, DIMENSION(:) :: H_block, S_block
1868 : TYPE(cp_logger_type), POINTER :: logger
1869 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1870 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
1871 :
1872 : EXTERNAL :: dsygv
1873 :
1874 94 : NULLIFY (logger, force_env_section, print_section, mixed_cdft)
1875 :
1876 94 : CPASSERT(ASSOCIATED(force_env))
1877 94 : CALL get_mixed_env(force_env%mixed_env, cdft_control=mixed_cdft)
1878 94 : CPASSERT(ASSOCIATED(mixed_cdft))
1879 :
1880 94 : IF (.NOT. mixed_cdft%block_diagonalize) RETURN
1881 :
1882 8 : logger => cp_get_default_logger()
1883 8 : CALL timeset(routineN, handle)
1884 :
1885 8 : CPASSERT(ALLOCATED(mixed_cdft%results%S))
1886 8 : CPASSERT(ALLOCATED(mixed_cdft%results%H))
1887 8 : nforce_eval = SIZE(mixed_cdft%results%S, 1)
1888 :
1889 : CALL force_env_get(force_env=force_env, &
1890 8 : force_env_section=force_env_section)
1891 8 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1892 8 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
1893 : ! Read block definitions from input
1894 8 : CALL mixed_cdft_read_block_diag(force_env, blocks, ignore_excited, nrecursion)
1895 8 : nblk = SIZE(blocks)
1896 : ! Start block diagonalization
1897 18 : DO irecursion = 1, nrecursion
1898 : ! Print block definitions
1899 10 : IF (iounit > 0 .AND. irecursion == 1) THEN
1900 4 : WRITE (iounit, '(/,T3,A)') '-------------------------- CDFT BLOCK DIAGONALIZATION ------------------------'
1901 4 : WRITE (iounit, '(T3,A)') 'Block diagonalizing the mixed CDFT Hamiltonian'
1902 4 : WRITE (iounit, '(T3,A,I3)') 'Number of blocks:', nblk
1903 4 : WRITE (iounit, '(T3,A,L3)') 'Ignoring excited states within blocks:', ignore_excited
1904 4 : WRITE (iounit, '(/,T3,A)') 'List of CDFT states for each block'
1905 14 : DO i = 1, nblk
1906 33 : WRITE (iounit, '(T6,A,I3,A,6I3)') 'Block', i, ':', (blocks(i)%array(j), j=1, SIZE(blocks(i)%array))
1907 : END DO
1908 : END IF
1909 : ! Recursive diagonalization: update counters and references
1910 10 : IF (irecursion > 1) THEN
1911 2 : nblk = nblk/2
1912 10 : ALLOCATE (blocks(nblk))
1913 2 : j = 1
1914 6 : DO i = 1, nblk
1915 4 : NULLIFY (blocks(i)%array)
1916 4 : ALLOCATE (blocks(i)%array(2))
1917 12 : blocks(i)%array = (/j, j + 1/)
1918 6 : j = j + 2
1919 : END DO
1920 : ! Print info
1921 2 : IF (iounit > 0) THEN
1922 1 : WRITE (iounit, '(/, T3,A)') 'Recursive block diagonalization of the mixed CDFT Hamiltonian'
1923 1 : WRITE (iounit, '(T6,A)') 'Block diagonalization is continued until only two matrix blocks remain.'
1924 1 : WRITE (iounit, '(T6,A)') 'The new blocks are formed by collecting pairs of blocks from the previous'
1925 1 : WRITE (iounit, '(T6,A)') 'block diagonalized matrix in ascending order.'
1926 1 : WRITE (iounit, '(/,T3,A,I3,A,I3)') 'Recursion step:', irecursion - 1, ' of ', nrecursion - 1
1927 1 : WRITE (iounit, '(/,T3,A)') 'List of old block indices for each new block'
1928 3 : DO i = 1, nblk
1929 7 : WRITE (iounit, '(T6,A,I3,A,6I3)') 'Block', i, ':', (blocks(i)%array(j), j=1, SIZE(blocks(i)%array))
1930 : END DO
1931 : END IF
1932 : END IF
1933 : ! Get the Hamiltonian and overlap matrices of each block
1934 10 : CALL mixed_cdft_get_blocks(mixed_cdft, blocks, H_block, S_block)
1935 : ! Diagonalize blocks
1936 10 : CALL mixed_cdft_diagonalize_blocks(blocks, H_block, S_block, eigenvalues)
1937 : ! Assemble the block diagonalized matrices
1938 10 : IF (ignore_excited) THEN
1939 8 : n = nblk
1940 : ELSE
1941 2 : n = nforce_eval
1942 : END IF
1943 10 : CALL mixed_cdft_assemble_block_diag(mixed_cdft, blocks, H_block, eigenvalues, n, iounit)
1944 : ! Deallocate work
1945 34 : DO i = 1, nblk
1946 24 : DEALLOCATE (H_block(i)%array)
1947 24 : DEALLOCATE (S_block(i)%array)
1948 24 : DEALLOCATE (eigenvalues(i)%array)
1949 34 : DEALLOCATE (blocks(i)%array)
1950 : END DO
1951 38 : DEALLOCATE (H_block, S_block, eigenvalues, blocks)
1952 : END DO ! recursion
1953 8 : IF (iounit > 0) &
1954 : WRITE (iounit, '(T3,A)') &
1955 4 : '------------------------------------------------------------------------------'
1956 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
1957 8 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
1958 8 : CALL timestop(handle)
1959 :
1960 94 : END SUBROUTINE mixed_cdft_block_diag
1961 : ! **************************************************************************************************
1962 : !> \brief Routine to calculate the CDFT electronic coupling reliability metric
1963 : !> \param force_env the force_env that holds the CDFT states
1964 : !> \param mixed_cdft the mixed_cdft env
1965 : !> \param density_matrix_diff array holding difference density matrices (P_j - P_i) for every CDFT
1966 : !> state permutation
1967 : !> \param ncol_mo the number of MOs per spin
1968 : !> \par History
1969 : !> 11.17 created [Nico Holmberg]
1970 : ! **************************************************************************************************
1971 14 : SUBROUTINE mixed_cdft_calculate_metric(force_env, mixed_cdft, density_matrix_diff, ncol_mo)
1972 : TYPE(force_env_type), POINTER :: force_env
1973 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
1974 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: density_matrix_diff
1975 : INTEGER, DIMENSION(:) :: ncol_mo
1976 :
1977 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_calculate_metric'
1978 :
1979 : INTEGER :: handle, ipermutation, ispin, j, &
1980 : nforce_eval, npermutations, nspins
1981 14 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: evals
1982 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: metric
1983 : TYPE(dbcsr_type) :: e_vectors
1984 :
1985 14 : CALL timeset(routineN, handle)
1986 14 : nforce_eval = SIZE(mixed_cdft%results%H, 1)
1987 14 : npermutations = nforce_eval*(nforce_eval - 1)/2
1988 14 : nspins = SIZE(density_matrix_diff, 2)
1989 56 : ALLOCATE (metric(npermutations, nspins))
1990 78 : metric = 0.0_dp
1991 14 : CALL dbcsr_create(e_vectors, template=density_matrix_diff(1, 1)%matrix)
1992 42 : DO ispin = 1, nspins
1993 84 : ALLOCATE (evals(ncol_mo(ispin)))
1994 64 : DO ipermutation = 1, npermutations
1995 : ! Take into account doubly occupied orbitals without LSD
1996 36 : IF (nspins == 1) &
1997 0 : CALL dbcsr_scale(density_matrix_diff(ipermutation, 1)%matrix, alpha_scalar=0.5_dp)
1998 : ! Diagonalize difference density matrix
1999 : CALL cp_dbcsr_syevd(density_matrix_diff(ipermutation, ispin)%matrix, e_vectors, evals, &
2000 36 : para_env=force_env%para_env, blacs_env=mixed_cdft%blacs_env)
2001 36 : CALL dbcsr_release_p(density_matrix_diff(ipermutation, ispin)%matrix)
2002 100 : DO j = 1, ncol_mo(ispin)
2003 72 : metric(ipermutation, ispin) = metric(ipermutation, ispin) + (evals(j)**2 - evals(j)**4)
2004 : END DO
2005 : END DO
2006 42 : DEALLOCATE (evals)
2007 : END DO
2008 14 : CALL dbcsr_release(e_vectors)
2009 14 : DEALLOCATE (density_matrix_diff)
2010 78 : metric(:, :) = metric(:, :)/4.0_dp
2011 14 : CALL mixed_cdft_result_type_set(mixed_cdft%results, metric=metric)
2012 14 : DEALLOCATE (metric)
2013 14 : CALL timestop(handle)
2014 :
2015 28 : END SUBROUTINE mixed_cdft_calculate_metric
2016 :
2017 : ! **************************************************************************************************
2018 : !> \brief Routine to calculate the electronic coupling according to the wavefunction overlap method
2019 : !> \param force_env the force_env that holds the CDFT states
2020 : !> \param mixed_cdft the mixed_cdft env
2021 : !> \param ncol_mo the number of MOs per spin
2022 : !> \param nrow_mo the number of AOs per spin
2023 : !> \par History
2024 : !> 11.17 created [Nico Holmberg]
2025 : ! **************************************************************************************************
2026 8 : SUBROUTINE mixed_cdft_wfn_overlap_method(force_env, mixed_cdft, ncol_mo, nrow_mo)
2027 : TYPE(force_env_type), POINTER :: force_env
2028 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2029 : INTEGER, DIMENSION(:) :: ncol_mo, nrow_mo
2030 :
2031 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_cdft_wfn_overlap_method'
2032 :
2033 : CHARACTER(LEN=default_path_length) :: file_name
2034 : INTEGER :: handle, ipermutation, ispin, istate, &
2035 : jstate, nao, nforce_eval, nmo, &
2036 : npermutations, nspins
2037 : LOGICAL :: exist, natom_mismatch
2038 : REAL(KIND=dp) :: energy_diff, maxocc, Sda
2039 8 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: coupling_wfn
2040 8 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: overlaps
2041 8 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2042 : TYPE(cp_fm_struct_type), POINTER :: mo_mo_fmstruct
2043 : TYPE(cp_fm_type) :: inverse_mat, mo_overlap_wfn, mo_tmp
2044 8 : TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: mixed_mo_coeff
2045 : TYPE(cp_logger_type), POINTER :: logger
2046 : TYPE(cp_subsys_type), POINTER :: subsys_mix
2047 : TYPE(dbcsr_type), POINTER :: mixed_matrix_s
2048 8 : TYPE(mo_set_type), ALLOCATABLE, DIMENSION(:) :: mo_set
2049 8 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2050 8 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2051 : TYPE(section_vals_type), POINTER :: force_env_section, mixed_cdft_section
2052 :
2053 8 : NULLIFY (mixed_cdft_section, subsys_mix, particle_set, qs_kind_set, atomic_kind_set, &
2054 8 : mixed_mo_coeff, mixed_matrix_s, force_env_section)
2055 16 : logger => cp_get_default_logger()
2056 :
2057 8 : CALL timeset(routineN, handle)
2058 8 : nforce_eval = SIZE(mixed_cdft%results%H, 1)
2059 8 : npermutations = nforce_eval*(nforce_eval - 1)/2
2060 8 : nspins = SIZE(nrow_mo)
2061 8 : mixed_mo_coeff => mixed_cdft%matrix%mixed_mo_coeff
2062 8 : mixed_matrix_s => mixed_cdft%matrix%mixed_matrix_s
2063 : CALL force_env_get(force_env=force_env, &
2064 8 : force_env_section=force_env_section)
2065 : ! Create mo_set for input wfn
2066 40 : ALLOCATE (mo_set(nspins))
2067 8 : IF (nspins == 2) THEN
2068 8 : maxocc = 1.0_dp
2069 : ELSE
2070 0 : maxocc = 2.0_dp
2071 : END IF
2072 24 : DO ispin = 1, nspins
2073 16 : nao = nrow_mo(ispin)
2074 16 : nmo = ncol_mo(ispin)
2075 : ! Only OT with fully occupied orbitals is implicitly supported
2076 : CALL allocate_mo_set(mo_set(ispin), nao=nao, nmo=nmo, nelectron=INT(maxocc*nmo), &
2077 : n_el_f=REAL(maxocc*nmo, dp), maxocc=maxocc, &
2078 16 : flexible_electron_count=0.0_dp)
2079 16 : CALL set_mo_set(mo_set(ispin), uniform_occupation=.TRUE., homo=nmo)
2080 16 : ALLOCATE (mo_set(ispin)%mo_coeff)
2081 : CALL cp_fm_create(matrix=mo_set(ispin)%mo_coeff, &
2082 : matrix_struct=mixed_mo_coeff(1, ispin)%matrix_struct, &
2083 16 : name="GS_MO_COEFF"//TRIM(ADJUSTL(cp_to_string(ispin)))//"MATRIX")
2084 48 : ALLOCATE (mo_set(ispin)%eigenvalues(nmo))
2085 40 : ALLOCATE (mo_set(ispin)%occupation_numbers(nmo))
2086 : END DO
2087 : ! Read wfn file (note we assume that the basis set is the same)
2088 8 : IF (force_env%mixed_env%do_mixed_qmmm_cdft) &
2089 : ! This really shouldnt be a problem?
2090 : CALL cp_abort(__LOCATION__, &
2091 0 : "QMMM + wavefunction overlap method not supported.")
2092 8 : CALL force_env_get(force_env=force_env, subsys=subsys_mix)
2093 8 : mixed_cdft_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT")
2094 8 : CALL cp_subsys_get(subsys_mix, atomic_kind_set=atomic_kind_set, particle_set=particle_set)
2095 8 : CPASSERT(ASSOCIATED(mixed_cdft%qs_kind_set))
2096 8 : IF (force_env%para_env%is_source()) &
2097 4 : CALL wfn_restart_file_name(file_name, exist, mixed_cdft_section, logger)
2098 8 : CALL force_env%para_env%bcast(exist)
2099 8 : CALL force_env%para_env%bcast(file_name)
2100 8 : IF (.NOT. exist) &
2101 : CALL cp_abort(__LOCATION__, &
2102 : "User requested to restart the wavefunction from the file named: "// &
2103 : TRIM(file_name)//". This file does not exist. Please check the existence of"// &
2104 : " the file or change properly the value of the keyword WFN_RESTART_FILE_NAME in"// &
2105 0 : " section FORCE_EVAL\MIXED\MIXED_CDFT.")
2106 : CALL read_mo_set_from_restart(mo_array=mo_set, atomic_kind_set=atomic_kind_set, &
2107 : qs_kind_set=mixed_cdft%qs_kind_set, particle_set=particle_set, &
2108 : para_env=force_env%para_env, id_nr=0, multiplicity=mixed_cdft%multiplicity, &
2109 : dft_section=mixed_cdft_section, natom_mismatch=natom_mismatch, &
2110 8 : cdft=.TRUE.)
2111 8 : IF (natom_mismatch) &
2112 : CALL cp_abort(__LOCATION__, &
2113 0 : "Restart wfn file has a wrong number of atoms")
2114 : ! Orthonormalize wfn
2115 24 : DO ispin = 1, nspins
2116 24 : IF (mixed_cdft%has_unit_metric) THEN
2117 0 : CALL make_basis_simple(mo_set(ispin)%mo_coeff, ncol_mo(ispin))
2118 : ELSE
2119 16 : CALL make_basis_sm(mo_set(ispin)%mo_coeff, ncol_mo(ispin), mixed_matrix_s)
2120 : END IF
2121 : END DO
2122 : ! Calculate MO overlaps between reference state (R) and CDFT state pairs I/J
2123 24 : ALLOCATE (coupling_wfn(npermutations))
2124 32 : ALLOCATE (overlaps(2, npermutations, nspins))
2125 96 : overlaps = 0.0_dp
2126 24 : DO ispin = 1, nspins
2127 : ! Allocate work
2128 16 : nao = nrow_mo(ispin)
2129 16 : nmo = ncol_mo(ispin)
2130 : CALL cp_fm_struct_create(mo_mo_fmstruct, nrow_global=nmo, ncol_global=nmo, &
2131 16 : context=mixed_cdft%blacs_env, para_env=force_env%para_env)
2132 : CALL cp_fm_create(matrix=mo_overlap_wfn, matrix_struct=mo_mo_fmstruct, &
2133 16 : name="MO_OVERLAP_MATRIX_WFN")
2134 : CALL cp_fm_create(matrix=inverse_mat, matrix_struct=mo_mo_fmstruct, &
2135 16 : name="INVERSE_MO_OVERLAP_MATRIX_WFN")
2136 16 : CALL cp_fm_struct_release(mo_mo_fmstruct)
2137 : CALL cp_fm_create(matrix=mo_tmp, &
2138 : matrix_struct=mixed_mo_coeff(1, ispin)%matrix_struct, &
2139 16 : name="OVERLAP_MO_COEFF_WFN")
2140 40 : DO ipermutation = 1, npermutations
2141 24 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
2142 : ! S*C_r
2143 : CALL cp_dbcsr_sm_fm_multiply(mixed_matrix_s, mo_set(ispin)%mo_coeff, &
2144 24 : mo_tmp, nmo, 1.0_dp, 0.0_dp)
2145 : ! C_i^T * (S*C_r)
2146 24 : CALL cp_fm_set_all(mo_overlap_wfn, alpha=0.0_dp)
2147 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
2148 : mixed_mo_coeff(istate, ispin), &
2149 24 : mo_tmp, 0.0_dp, mo_overlap_wfn)
2150 24 : CALL cp_fm_invert(mo_overlap_wfn, inverse_mat, overlaps(1, ipermutation, ispin), eps_svd=mixed_cdft%eps_svd)
2151 : ! C_j^T * (S*C_r)
2152 24 : CALL cp_fm_set_all(mo_overlap_wfn, alpha=0.0_dp)
2153 : CALL parallel_gemm('T', 'N', nmo, nmo, nao, 1.0_dp, &
2154 : mixed_mo_coeff(jstate, ispin), &
2155 24 : mo_tmp, 0.0_dp, mo_overlap_wfn)
2156 64 : CALL cp_fm_invert(mo_overlap_wfn, inverse_mat, overlaps(2, ipermutation, ispin), eps_svd=mixed_cdft%eps_svd)
2157 : END DO
2158 16 : CALL cp_fm_release(mo_overlap_wfn)
2159 16 : CALL cp_fm_release(inverse_mat)
2160 16 : CALL cp_fm_release(mo_tmp)
2161 56 : CALL deallocate_mo_set(mo_set(ispin))
2162 : END DO
2163 8 : DEALLOCATE (mo_set)
2164 20 : DO ipermutation = 1, npermutations
2165 12 : CALL map_permutation_to_states(nforce_eval, ipermutation, istate, jstate)
2166 12 : IF (nspins == 2) THEN
2167 12 : overlaps(1, ipermutation, 1) = ABS(overlaps(1, ipermutation, 1)*overlaps(1, ipermutation, 2)) ! A in eq. 12c
2168 12 : overlaps(2, ipermutation, 1) = ABS(overlaps(2, ipermutation, 1)*overlaps(2, ipermutation, 2)) ! B in eq. 12c
2169 : ELSE
2170 0 : overlaps(1, ipermutation, 1) = overlaps(1, ipermutation, 1)**2
2171 0 : overlaps(2, ipermutation, 1) = overlaps(2, ipermutation, 1)**2
2172 : END IF
2173 : ! Calculate coupling using eq. 12c
2174 : ! The coupling is singular if A = B (i.e. states I/J are identical or charge in ground state is fully delocalized)
2175 32 : IF (ABS(overlaps(1, ipermutation, 1) - overlaps(2, ipermutation, 1)) .LE. 1.0e-14_dp) THEN
2176 : CALL cp_warn(__LOCATION__, &
2177 : "Coupling between states is singular and set to zero. "// &
2178 : "Potential causes: coupling is computed between identical CDFT states or the spin/charge "// &
2179 2 : "density is fully delocalized in the unconstrained ground state.")
2180 2 : coupling_wfn(ipermutation) = 0.0_dp
2181 : ELSE
2182 10 : energy_diff = mixed_cdft%results%energy(jstate) - mixed_cdft%results%energy(istate)
2183 10 : Sda = mixed_cdft%results%S(istate, jstate)
2184 : coupling_wfn(ipermutation) = ABS((overlaps(1, ipermutation, 1)*overlaps(2, ipermutation, 1)/ &
2185 : (overlaps(1, ipermutation, 1)**2 - overlaps(2, ipermutation, 1)**2))* &
2186 : (energy_diff)/(1.0_dp - Sda**2)* &
2187 : (1.0_dp - (overlaps(1, ipermutation, 1)**2 + overlaps(2, ipermutation, 1)**2)/ &
2188 : (2.0_dp*overlaps(1, ipermutation, 1)*overlaps(2, ipermutation, 1))* &
2189 10 : Sda))
2190 : END IF
2191 : END DO
2192 8 : DEALLOCATE (overlaps)
2193 8 : CALL mixed_cdft_result_type_set(mixed_cdft%results, wfn=coupling_wfn)
2194 8 : DEALLOCATE (coupling_wfn)
2195 8 : CALL timestop(handle)
2196 :
2197 16 : END SUBROUTINE mixed_cdft_wfn_overlap_method
2198 :
2199 : ! **************************************************************************************************
2200 : !> \brief Becke constraint adapted to mixed calculations, details in qs_cdft_methods.F
2201 : !> \param force_env the force_env that holds the CDFT states
2202 : !> \param calculate_forces determines if forces should be calculted
2203 : !> \par History
2204 : !> 02.2016 created [Nico Holmberg]
2205 : !> 03.2016 added dynamic load balancing (dlb)
2206 : !> changed pw_p_type data types to rank-3 reals to accommodate dlb
2207 : !> and to reduce overall memory footprint
2208 : !> split to subroutines [Nico Holmberg]
2209 : !> 04.2016 introduced mixed grid mapping [Nico Holmberg]
2210 : ! **************************************************************************************************
2211 34 : SUBROUTINE mixed_becke_constraint(force_env, calculate_forces)
2212 : TYPE(force_env_type), POINTER :: force_env
2213 : LOGICAL, INTENT(IN) :: calculate_forces
2214 :
2215 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint'
2216 :
2217 : INTEGER :: handle
2218 34 : INTEGER, ALLOCATABLE, DIMENSION(:) :: catom
2219 : LOGICAL :: in_memory, store_vectors
2220 34 : LOGICAL, ALLOCATABLE, DIMENSION(:) :: is_constraint
2221 34 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: coefficients
2222 34 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: position_vecs, R12
2223 34 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: pair_dist_vecs
2224 : TYPE(cp_logger_type), POINTER :: logger
2225 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2226 : TYPE(mixed_environment_type), POINTER :: mixed_env
2227 :
2228 34 : NULLIFY (mixed_env, mixed_cdft)
2229 34 : store_vectors = .TRUE.
2230 34 : logger => cp_get_default_logger()
2231 34 : CALL timeset(routineN, handle)
2232 34 : mixed_env => force_env%mixed_env
2233 34 : CALL get_mixed_env(mixed_env, cdft_control=mixed_cdft)
2234 : CALL mixed_becke_constraint_init(force_env, mixed_cdft, calculate_forces, &
2235 : is_constraint, in_memory, store_vectors, &
2236 : R12, position_vecs, pair_dist_vecs, &
2237 34 : coefficients, catom)
2238 : CALL mixed_becke_constraint_low(force_env, mixed_cdft, in_memory, &
2239 : is_constraint, store_vectors, R12, &
2240 : position_vecs, pair_dist_vecs, &
2241 34 : coefficients, catom)
2242 34 : CALL timestop(handle)
2243 :
2244 34 : END SUBROUTINE mixed_becke_constraint
2245 : ! **************************************************************************************************
2246 : !> \brief Initialize the mixed Becke constraint calculation
2247 : !> \param force_env the force_env that holds the CDFT states
2248 : !> \param mixed_cdft container for structures needed to build the mixed CDFT constraint
2249 : !> \param calculate_forces determines if forces should be calculted
2250 : !> \param is_constraint a list used to determine which atoms in the system define the constraint
2251 : !> \param in_memory decides whether to build the weight function gradients in parallel before solving
2252 : !> the CDFT states or later during the SCF procedure of the individual states
2253 : !> \param store_vectors should temporary arrays be stored in memory to accelerate the calculation
2254 : !> \param R12 temporary array holding the pairwise atomic distances
2255 : !> \param position_vecs temporary array holding the pbc corrected atomic position vectors
2256 : !> \param pair_dist_vecs temporary array holding the pairwise displament vectors
2257 : !> \param coefficients array that determines how atoms should be summed to form the constraint
2258 : !> \param catom temporary array to map the global index of constraint atoms to their position
2259 : !> in a list that holds only constraint atoms
2260 : !> \par History
2261 : !> 03.2016 created [Nico Holmberg]
2262 : ! **************************************************************************************************
2263 34 : SUBROUTINE mixed_becke_constraint_init(force_env, mixed_cdft, calculate_forces, &
2264 : is_constraint, in_memory, store_vectors, &
2265 : R12, position_vecs, pair_dist_vecs, coefficients, &
2266 : catom)
2267 : TYPE(force_env_type), POINTER :: force_env
2268 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2269 : LOGICAL, INTENT(IN) :: calculate_forces
2270 : LOGICAL, ALLOCATABLE, DIMENSION(:), INTENT(OUT) :: is_constraint
2271 : LOGICAL, INTENT(OUT) :: in_memory
2272 : LOGICAL, INTENT(IN) :: store_vectors
2273 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
2274 : INTENT(out) :: R12, position_vecs
2275 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
2276 : INTENT(out) :: pair_dist_vecs
2277 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
2278 : INTENT(OUT) :: coefficients
2279 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(out) :: catom
2280 :
2281 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint_init'
2282 :
2283 : CHARACTER(len=2) :: element_symbol
2284 : INTEGER :: atom_a, bounds(2), handle, i, iatom, iex, iforce_eval, ikind, iounit, ithread, j, &
2285 : jatom, katom, my_work, my_work_size, natom, nforce_eval, nkind, np(3), npme, nthread, &
2286 : numexp, offset_dlb, unit_nr
2287 : INTEGER, DIMENSION(2, 3) :: bo, bo_conf
2288 34 : INTEGER, DIMENSION(:), POINTER :: atom_list, cores, stride
2289 : LOGICAL :: build, mpi_io
2290 : REAL(kind=dp) :: alpha, chi, coef, ircov, jrcov, ra(3), &
2291 : radius, uij
2292 : REAL(kind=dp), DIMENSION(3) :: cell_v, dist_vec, dr, r, r1, shift
2293 34 : REAL(KIND=dp), DIMENSION(:), POINTER :: radii_list
2294 34 : REAL(KIND=dp), DIMENSION(:, :), POINTER :: pab
2295 34 : TYPE(atomic_kind_type), DIMENSION(:), POINTER :: atomic_kind_set
2296 : TYPE(cdft_control_type), POINTER :: cdft_control
2297 : TYPE(cell_type), POINTER :: cell
2298 : TYPE(cp_logger_type), POINTER :: logger
2299 : TYPE(cp_subsys_type), POINTER :: subsys_mix
2300 : TYPE(force_env_type), POINTER :: force_env_qs
2301 : TYPE(hirshfeld_type), POINTER :: cavity_env
2302 : TYPE(particle_list_type), POINTER :: particles
2303 34 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
2304 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
2305 34 : TYPE(qs_kind_type), DIMENSION(:), POINTER :: qs_kind_set
2306 : TYPE(realspace_grid_type), POINTER :: rs_cavity
2307 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
2308 :
2309 34 : NULLIFY (pab, cell, force_env_qs, particle_set, force_env_section, print_section, &
2310 34 : qs_kind_set, particles, subsys_mix, rs_cavity, cavity_env, auxbas_pw_pool, &
2311 34 : atomic_kind_set, radii_list, cdft_control)
2312 68 : logger => cp_get_default_logger()
2313 34 : nforce_eval = SIZE(force_env%sub_force_env)
2314 34 : CALL timeset(routineN, handle)
2315 34 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
2316 34 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
2317 : CALL force_env_get(force_env=force_env, &
2318 : subsys=subsys_mix, &
2319 26 : cell=cell)
2320 : CALL cp_subsys_get(subsys=subsys_mix, &
2321 : particles=particles, &
2322 26 : particle_set=particle_set)
2323 : ELSE
2324 24 : DO iforce_eval = 1, nforce_eval
2325 16 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
2326 24 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
2327 : END DO
2328 : CALL get_qs_env(force_env_qs%qmmm_env%qs_env, &
2329 : cp_subsys=subsys_mix, &
2330 8 : cell=cell)
2331 : CALL cp_subsys_get(subsys=subsys_mix, &
2332 : particles=particles, &
2333 8 : particle_set=particle_set)
2334 : END IF
2335 34 : natom = SIZE(particles%els)
2336 34 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2337 34 : cdft_control => mixed_cdft%cdft_control
2338 34 : CPASSERT(ASSOCIATED(cdft_control))
2339 34 : IF (.NOT. ASSOCIATED(cdft_control%becke_control%cutoffs)) THEN
2340 22 : CALL cp_subsys_get(subsys_mix, atomic_kind_set=atomic_kind_set)
2341 66 : ALLOCATE (cdft_control%becke_control%cutoffs(natom))
2342 26 : SELECT CASE (cdft_control%becke_control%cutoff_type)
2343 : CASE (becke_cutoff_global)
2344 12 : cdft_control%becke_control%cutoffs(:) = cdft_control%becke_control%rglobal
2345 : CASE (becke_cutoff_element)
2346 18 : IF (.NOT. SIZE(atomic_kind_set) == SIZE(cdft_control%becke_control%cutoffs_tmp)) &
2347 : CALL cp_abort(__LOCATION__, &
2348 : "Size of keyword BECKE_CONSTRAINT\ELEMENT_CUTOFFS does "// &
2349 0 : "not match number of atomic kinds in the input coordinate file.")
2350 54 : DO ikind = 1, SIZE(atomic_kind_set)
2351 36 : CALL get_atomic_kind(atomic_kind_set(ikind), natom=katom, atom_list=atom_list)
2352 90 : DO iatom = 1, katom
2353 36 : atom_a = atom_list(iatom)
2354 72 : cdft_control%becke_control%cutoffs(atom_a) = cdft_control%becke_control%cutoffs_tmp(ikind)
2355 : END DO
2356 : END DO
2357 40 : DEALLOCATE (cdft_control%becke_control%cutoffs_tmp)
2358 : END SELECT
2359 : END IF
2360 34 : build = .FALSE.
2361 34 : IF (cdft_control%becke_control%adjust .AND. &
2362 : .NOT. ASSOCIATED(cdft_control%becke_control%aij)) THEN
2363 72 : ALLOCATE (cdft_control%becke_control%aij(natom, natom))
2364 18 : build = .TRUE.
2365 : END IF
2366 102 : ALLOCATE (catom(cdft_control%natoms))
2367 : IF (cdft_control%save_pot .OR. &
2368 : cdft_control%becke_control%cavity_confine .OR. &
2369 34 : cdft_control%becke_control%should_skip .OR. &
2370 : mixed_cdft%first_iteration) THEN
2371 102 : ALLOCATE (is_constraint(natom))
2372 102 : is_constraint = .FALSE.
2373 : END IF
2374 34 : in_memory = calculate_forces .AND. cdft_control%becke_control%in_memory
2375 34 : IF (in_memory .NEQV. calculate_forces) &
2376 : CALL cp_abort(__LOCATION__, &
2377 : "The flag BECKE_CONSTRAINT\IN_MEMORY must be activated "// &
2378 0 : "for the calculation of mixed CDFT forces")
2379 102 : IF (in_memory .OR. mixed_cdft%first_iteration) ALLOCATE (coefficients(natom))
2380 102 : DO i = 1, cdft_control%natoms
2381 68 : catom(i) = cdft_control%atoms(i)
2382 : IF (cdft_control%save_pot .OR. &
2383 : cdft_control%becke_control%cavity_confine .OR. &
2384 68 : cdft_control%becke_control%should_skip .OR. &
2385 : mixed_cdft%first_iteration) &
2386 68 : is_constraint(catom(i)) = .TRUE.
2387 68 : IF (in_memory .OR. mixed_cdft%first_iteration) &
2388 102 : coefficients(catom(i)) = cdft_control%group(1)%coeff(i)
2389 : END DO
2390 34 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_pw_pool=auxbas_pw_pool)
2391 340 : bo = auxbas_pw_pool%pw_grid%bounds_local
2392 136 : np = auxbas_pw_pool%pw_grid%npts
2393 136 : dr = auxbas_pw_pool%pw_grid%dr
2394 136 : shift = -REAL(MODULO(np, 2), dp)*dr/2.0_dp
2395 34 : IF (store_vectors) THEN
2396 106 : IF (in_memory) ALLOCATE (pair_dist_vecs(3, natom, natom))
2397 102 : ALLOCATE (position_vecs(3, natom))
2398 : END IF
2399 136 : DO i = 1, 3
2400 136 : cell_v(i) = cell%hmat(i, i)
2401 : END DO
2402 136 : ALLOCATE (R12(natom, natom))
2403 68 : DO iatom = 1, natom - 1
2404 102 : DO jatom = iatom + 1, natom
2405 136 : r = particle_set(iatom)%r
2406 136 : r1 = particle_set(jatom)%r
2407 136 : DO i = 1, 3
2408 102 : r(i) = MODULO(r(i), cell%hmat(i, i)) - cell%hmat(i, i)/2._dp
2409 136 : r1(i) = MODULO(r1(i), cell%hmat(i, i)) - cell%hmat(i, i)/2._dp
2410 : END DO
2411 136 : dist_vec = (r - r1) - ANINT((r - r1)/cell_v)*cell_v
2412 34 : IF (store_vectors) THEN
2413 136 : position_vecs(:, iatom) = r(:)
2414 136 : IF (iatom == 1 .AND. jatom == natom) position_vecs(:, jatom) = r1(:)
2415 34 : IF (in_memory) THEN
2416 96 : pair_dist_vecs(:, iatom, jatom) = dist_vec(:)
2417 96 : pair_dist_vecs(:, jatom, iatom) = -dist_vec(:)
2418 : END IF
2419 : END IF
2420 136 : R12(iatom, jatom) = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
2421 34 : R12(jatom, iatom) = R12(iatom, jatom)
2422 68 : IF (build) THEN
2423 : CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, &
2424 18 : kind_number=ikind)
2425 18 : ircov = cdft_control%becke_control%radii(ikind)
2426 : CALL get_atomic_kind(atomic_kind=particle_set(jatom)%atomic_kind, &
2427 18 : kind_number=ikind)
2428 18 : jrcov = cdft_control%becke_control%radii(ikind)
2429 18 : IF (ircov .NE. jrcov) THEN
2430 18 : chi = ircov/jrcov
2431 18 : uij = (chi - 1.0_dp)/(chi + 1.0_dp)
2432 18 : cdft_control%becke_control%aij(iatom, jatom) = uij/(uij**2 - 1.0_dp)
2433 18 : IF (cdft_control%becke_control%aij(iatom, jatom) &
2434 : .GT. 0.5_dp) THEN
2435 0 : cdft_control%becke_control%aij(iatom, jatom) = 0.5_dp
2436 18 : ELSE IF (cdft_control%becke_control%aij(iatom, jatom) &
2437 : .LT. -0.5_dp) THEN
2438 0 : cdft_control%becke_control%aij(iatom, jatom) = -0.5_dp
2439 : END IF
2440 : ELSE
2441 0 : cdft_control%becke_control%aij(iatom, jatom) = 0.0_dp
2442 : END IF
2443 : cdft_control%becke_control%aij(jatom, iatom) = &
2444 18 : -cdft_control%becke_control%aij(iatom, jatom)
2445 : END IF
2446 : END DO
2447 : END DO
2448 : ! Dump some additional information about the calculation
2449 34 : IF (mixed_cdft%first_iteration) THEN
2450 22 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2451 22 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
2452 22 : IF (iounit > 0) THEN
2453 : WRITE (iounit, '(/,T3,A,T66)') &
2454 11 : '-------------------------- Becke atomic parameters ---------------------------'
2455 11 : IF (cdft_control%becke_control%adjust) THEN
2456 : WRITE (iounit, '(T3,A,A)') &
2457 9 : 'Atom Element Coefficient', ' Cutoff (angstrom) CDFT Radius (angstrom)'
2458 27 : DO iatom = 1, natom
2459 : CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, &
2460 : element_symbol=element_symbol, &
2461 18 : kind_number=ikind)
2462 18 : ircov = cp_unit_from_cp2k(cdft_control%becke_control%radii(ikind), "angstrom")
2463 18 : IF (is_constraint(iatom)) THEN
2464 18 : coef = coefficients(iatom)
2465 : ELSE
2466 0 : coef = 0.0_dp
2467 : END IF
2468 : WRITE (iounit, "(i6,T14,A2,T22,F8.3,T44,F8.3,T73,F8.3)") &
2469 18 : iatom, ADJUSTR(element_symbol), coef, &
2470 18 : cp_unit_from_cp2k(cdft_control%becke_control%cutoffs(iatom), "angstrom"), &
2471 63 : ircov
2472 : END DO
2473 : ELSE
2474 : WRITE (iounit, '(T3,A,A)') &
2475 2 : 'Atom Element Coefficient', ' Cutoff (angstrom)'
2476 6 : DO iatom = 1, natom
2477 : CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, &
2478 4 : element_symbol=element_symbol)
2479 4 : IF (is_constraint(iatom)) THEN
2480 4 : coef = coefficients(iatom)
2481 : ELSE
2482 0 : coef = 0.0_dp
2483 : END IF
2484 : WRITE (iounit, "(i6,T14,A2,T22,F8.3,T44,F8.3)") &
2485 4 : iatom, ADJUSTR(element_symbol), coef, &
2486 10 : cp_unit_from_cp2k(cdft_control%becke_control%cutoffs(iatom), "angstrom")
2487 : END DO
2488 : END IF
2489 : WRITE (iounit, '(T3,A)') &
2490 11 : '------------------------------------------------------------------------------'
2491 : END IF
2492 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
2493 22 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2494 22 : mixed_cdft%first_iteration = .FALSE.
2495 : END IF
2496 :
2497 34 : IF (cdft_control%becke_control%cavity_confine) THEN
2498 34 : CPASSERT(ASSOCIATED(mixed_cdft%qs_kind_set))
2499 34 : cavity_env => cdft_control%becke_control%cavity_env
2500 34 : qs_kind_set => mixed_cdft%qs_kind_set
2501 34 : CALL cp_subsys_get(subsys_mix, atomic_kind_set=atomic_kind_set)
2502 34 : nkind = SIZE(qs_kind_set)
2503 34 : IF (.NOT. ASSOCIATED(cavity_env%kind_shape_fn)) THEN
2504 22 : IF (ASSOCIATED(cdft_control%becke_control%radii)) THEN
2505 54 : ALLOCATE (radii_list(SIZE(cdft_control%becke_control%radii)))
2506 54 : DO ikind = 1, SIZE(cdft_control%becke_control%radii)
2507 54 : IF (cavity_env%use_bohr) THEN
2508 0 : radii_list(ikind) = cdft_control%becke_control%radii(ikind)
2509 : ELSE
2510 36 : radii_list(ikind) = cp_unit_from_cp2k(cdft_control%becke_control%radii(ikind), "angstrom")
2511 : END IF
2512 : END DO
2513 : END IF
2514 : CALL create_shape_function(cavity_env, qs_kind_set, atomic_kind_set, &
2515 : radius=cdft_control%becke_control%rcavity, &
2516 22 : radii_list=radii_list)
2517 22 : IF (ASSOCIATED(radii_list)) &
2518 18 : DEALLOCATE (radii_list)
2519 : END IF
2520 34 : NULLIFY (rs_cavity)
2521 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_rs_grid=rs_cavity, &
2522 34 : auxbas_pw_pool=auxbas_pw_pool)
2523 : ! be careful in parallel nsmax is chosen with multigrid in mind!
2524 34 : CALL rs_grid_zero(rs_cavity)
2525 34 : ALLOCATE (pab(1, 1))
2526 34 : nthread = 1
2527 34 : ithread = 0
2528 102 : DO ikind = 1, SIZE(atomic_kind_set)
2529 68 : numexp = cavity_env%kind_shape_fn(ikind)%numexp
2530 68 : IF (numexp <= 0) CYCLE
2531 68 : CALL get_atomic_kind(atomic_kind_set(ikind), natom=katom, atom_list=atom_list)
2532 204 : ALLOCATE (cores(katom))
2533 136 : DO iex = 1, numexp
2534 68 : alpha = cavity_env%kind_shape_fn(ikind)%zet(iex)
2535 68 : coef = cavity_env%kind_shape_fn(ikind)%coef(iex)
2536 68 : npme = 0
2537 136 : cores = 0
2538 136 : DO iatom = 1, katom
2539 136 : IF (rs_cavity%desc%parallel .AND. .NOT. rs_cavity%desc%distributed) THEN
2540 : ! replicated realspace grid, split the atoms up between procs
2541 68 : IF (MODULO(iatom, rs_cavity%desc%group_size) == rs_cavity%desc%my_pos) THEN
2542 34 : npme = npme + 1
2543 34 : cores(npme) = iatom
2544 : END IF
2545 : ELSE
2546 0 : npme = npme + 1
2547 0 : cores(npme) = iatom
2548 : END IF
2549 : END DO
2550 170 : DO j = 1, npme
2551 34 : iatom = cores(j)
2552 34 : atom_a = atom_list(iatom)
2553 34 : pab(1, 1) = coef
2554 34 : IF (store_vectors) THEN
2555 136 : ra(:) = position_vecs(:, atom_a) + cell_v(:)/2._dp
2556 : ELSE
2557 0 : ra(:) = pbc(particle_set(atom_a)%r, cell)
2558 : END IF
2559 102 : IF (is_constraint(atom_a)) THEN
2560 : radius = exp_radius_very_extended(la_min=0, la_max=0, lb_min=0, lb_max=0, &
2561 : ra=ra, rb=ra, rp=ra, &
2562 : zetp=alpha, eps=mixed_cdft%eps_rho_rspace, &
2563 : pab=pab, o1=0, o2=0, & ! without map_consistent
2564 34 : prefactor=1.0_dp, cutoff=0.0_dp)
2565 :
2566 : CALL collocate_pgf_product(0, alpha, 0, 0, 0.0_dp, 0, ra, &
2567 : (/0.0_dp, 0.0_dp, 0.0_dp/), 1.0_dp, pab, 0, 0, &
2568 : rs_cavity, &
2569 : radius=radius, ga_gb_function=GRID_FUNC_AB, &
2570 : use_subpatch=.TRUE., &
2571 34 : subpatch_pattern=0)
2572 : END IF
2573 : END DO
2574 : END DO
2575 170 : DEALLOCATE (cores)
2576 : END DO
2577 34 : DEALLOCATE (pab)
2578 34 : CALL auxbas_pw_pool%create_pw(cdft_control%becke_control%cavity)
2579 34 : CALL transfer_rs2pw(rs_cavity, cdft_control%becke_control%cavity)
2580 : CALL hfun_zero(cdft_control%becke_control%cavity%array, &
2581 : cdft_control%becke_control%eps_cavity, &
2582 34 : just_zero=.FALSE., bounds=bounds, work=my_work)
2583 34 : IF (bounds(2) .LT. bo(2, 3)) THEN
2584 8 : bounds(2) = bounds(2) - 1
2585 : ELSE
2586 26 : bounds(2) = bo(2, 3)
2587 : END IF
2588 34 : IF (bounds(1) .GT. bo(1, 3)) THEN
2589 : ! In the special case bounds(1) == bounds(2) == bo(2, 3), after this check
2590 : ! bounds(1) > bounds(2) and the subsequent gradient allocation (:, :, :, bounds(1):bounds(2))
2591 : ! will correctly allocate a 0-sized array
2592 8 : bounds(1) = bounds(1) + 1
2593 : ELSE
2594 26 : bounds(1) = bo(1, 3)
2595 : END IF
2596 34 : IF (bounds(1) > bounds(2)) THEN
2597 0 : my_work_size = 0
2598 : ELSE
2599 34 : my_work_size = (bounds(2) - bounds(1) + 1)
2600 34 : IF (mixed_cdft%is_pencil .OR. mixed_cdft%is_special) THEN
2601 0 : my_work_size = my_work_size*(bo(2, 2) - bo(1, 2) + 1)
2602 : ELSE
2603 34 : my_work_size = my_work_size*(bo(2, 1) - bo(1, 1) + 1)
2604 : END IF
2605 : END IF
2606 102 : cdft_control%becke_control%confine_bounds = bounds
2607 34 : IF (cdft_control%becke_control%print_cavity) THEN
2608 : CALL hfun_zero(cdft_control%becke_control%cavity%array, &
2609 0 : cdft_control%becke_control%eps_cavity, just_zero=.TRUE.)
2610 : NULLIFY (stride)
2611 0 : ALLOCATE (stride(3))
2612 0 : stride = (/2, 2, 2/)
2613 0 : mpi_io = .TRUE.
2614 : unit_nr = cp_print_key_unit_nr(logger, print_section, "", &
2615 : middle_name="BECKE_CAVITY", &
2616 : extension=".cube", file_position="REWIND", &
2617 0 : log_filename=.FALSE., mpi_io=mpi_io)
2618 0 : IF (force_env%para_env%is_source() .AND. unit_nr .LT. 1) &
2619 : CALL cp_abort(__LOCATION__, &
2620 0 : "Please turn on PROGRAM_RUN_INFO to print cavity")
2621 : CALL cp_pw_to_cube(cdft_control%becke_control%cavity, &
2622 : unit_nr, "CAVITY", particles=particles, &
2623 0 : stride=stride, mpi_io=mpi_io)
2624 0 : CALL cp_print_key_finished_output(unit_nr, logger, print_section, '', mpi_io=mpi_io)
2625 0 : DEALLOCATE (stride)
2626 : END IF
2627 : END IF
2628 34 : bo_conf = bo
2629 34 : IF (cdft_control%becke_control%cavity_confine) &
2630 102 : bo_conf(:, 3) = cdft_control%becke_control%confine_bounds
2631 : ! Load balance
2632 34 : IF (mixed_cdft%dlb) &
2633 : CALL mixed_becke_constraint_dlb(force_env, mixed_cdft, my_work, &
2634 8 : my_work_size, natom, bo, bo_conf)
2635 : ! The bounds have been finalized => time to allocate storage for working matrices
2636 34 : offset_dlb = 0
2637 34 : IF (mixed_cdft%dlb) THEN
2638 8 : IF (mixed_cdft%dlb_control%send_work .AND. .NOT. mixed_cdft%is_special) &
2639 8 : offset_dlb = SUM(mixed_cdft%dlb_control%target_list(2, :))
2640 : END IF
2641 34 : IF (cdft_control%becke_control%cavity_confine) THEN
2642 : ! Get rid of the zero part of the confinement cavity (cr3d -> real(:,:,:))
2643 34 : IF (mixed_cdft%is_special) THEN
2644 0 : ALLOCATE (mixed_cdft%sendbuff(SIZE(mixed_cdft%dest_list)))
2645 0 : DO i = 1, SIZE(mixed_cdft%dest_list)
2646 : ALLOCATE (mixed_cdft%sendbuff(i)%cavity(mixed_cdft%dest_list_bo(1, i):mixed_cdft%dest_list_bo(2, i), &
2647 0 : bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2648 : mixed_cdft%sendbuff(i)%cavity = cdft_control%becke_control%cavity%array(mixed_cdft%dest_list_bo(1, i): &
2649 : mixed_cdft%dest_list_bo(2, i), &
2650 : bo(1, 2):bo(2, 2), &
2651 0 : bo_conf(1, 3):bo_conf(2, 3))
2652 : END DO
2653 34 : ELSE IF (mixed_cdft%is_pencil) THEN
2654 0 : ALLOCATE (mixed_cdft%cavity(bo(1, 1) + offset_dlb:bo(2, 1), bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2655 : mixed_cdft%cavity = cdft_control%becke_control%cavity%array(bo(1, 1) + offset_dlb:bo(2, 1), &
2656 : bo(1, 2):bo(2, 2), &
2657 0 : bo_conf(1, 3):bo_conf(2, 3))
2658 : ELSE
2659 170 : ALLOCATE (mixed_cdft%cavity(bo(1, 1):bo(2, 1), bo(1, 2) + offset_dlb:bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2660 : mixed_cdft%cavity = cdft_control%becke_control%cavity%array(bo(1, 1):bo(2, 1), &
2661 : bo(1, 2) + offset_dlb:bo(2, 2), &
2662 3568226 : bo_conf(1, 3):bo_conf(2, 3))
2663 : END IF
2664 34 : CALL auxbas_pw_pool%give_back_pw(cdft_control%becke_control%cavity)
2665 : END IF
2666 34 : IF (mixed_cdft%is_special) THEN
2667 0 : DO i = 1, SIZE(mixed_cdft%dest_list)
2668 : ALLOCATE (mixed_cdft%sendbuff(i)%weight(mixed_cdft%dest_list_bo(1, i):mixed_cdft%dest_list_bo(2, i), &
2669 0 : bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2670 0 : mixed_cdft%sendbuff(i)%weight = 0.0_dp
2671 : END DO
2672 34 : ELSE IF (mixed_cdft%is_pencil) THEN
2673 0 : ALLOCATE (mixed_cdft%weight(bo(1, 1) + offset_dlb:bo(2, 1), bo(1, 2):bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2674 0 : mixed_cdft%weight = 0.0_dp
2675 : ELSE
2676 170 : ALLOCATE (mixed_cdft%weight(bo(1, 1):bo(2, 1), bo(1, 2) + offset_dlb:bo(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
2677 1784130 : mixed_cdft%weight = 0.0_dp
2678 : END IF
2679 34 : IF (in_memory) THEN
2680 24 : IF (mixed_cdft%is_special) THEN
2681 0 : DO i = 1, SIZE(mixed_cdft%dest_list)
2682 : ALLOCATE (mixed_cdft%sendbuff(i)%gradients(3*natom, mixed_cdft%dest_list_bo(1, i): &
2683 : mixed_cdft%dest_list_bo(2, i), &
2684 : bo(1, 2):bo(2, 2), &
2685 0 : bo_conf(1, 3):bo_conf(2, 3)))
2686 0 : mixed_cdft%sendbuff(i)%gradients = 0.0_dp
2687 : END DO
2688 24 : ELSE IF (mixed_cdft%is_pencil) THEN
2689 : ALLOCATE (cdft_control%group(1)%gradients(3*natom, bo(1, 1) + offset_dlb:bo(2, 1), &
2690 : bo(1, 2):bo(2, 2), &
2691 0 : bo_conf(1, 3):bo_conf(2, 3)))
2692 0 : cdft_control%group(1)%gradients = 0.0_dp
2693 : ELSE
2694 : ALLOCATE (cdft_control%group(1)%gradients(3*natom, bo(1, 1):bo(2, 1), &
2695 : bo(1, 2) + offset_dlb:bo(2, 2), &
2696 144 : bo_conf(1, 3):bo_conf(2, 3)))
2697 9808936 : cdft_control%group(1)%gradients = 0.0_dp
2698 : END IF
2699 : END IF
2700 :
2701 34 : CALL timestop(handle)
2702 :
2703 34 : END SUBROUTINE mixed_becke_constraint_init
2704 :
2705 : ! **************************************************************************************************
2706 : !> \brief Setup load balancing for mixed Becke calculation
2707 : !> \param force_env the force_env that holds the CDFT states
2708 : !> \param mixed_cdft container for structures needed to build the mixed CDFT constraint
2709 : !> \param my_work an estimate of the work per processor
2710 : !> \param my_work_size size of the smallest array slice per processor. overloaded processors will
2711 : !> redistribute works as integer multiples of this value.
2712 : !> \param natom the total number of atoms
2713 : !> \param bo bounds of the realspace grid that holds the electron density
2714 : !> \param bo_conf same as bo, but bounds along z-direction have been compacted with confinement
2715 : !> \par History
2716 : !> 03.2016 created [Nico Holmberg]
2717 : ! **************************************************************************************************
2718 8 : SUBROUTINE mixed_becke_constraint_dlb(force_env, mixed_cdft, my_work, &
2719 : my_work_size, natom, bo, bo_conf)
2720 : TYPE(force_env_type), POINTER :: force_env
2721 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
2722 : INTEGER, INTENT(IN) :: my_work, my_work_size, natom
2723 : INTEGER, DIMENSION(2, 3) :: bo, bo_conf
2724 :
2725 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint_dlb'
2726 : INTEGER, PARAMETER :: should_deallocate = 7000, &
2727 : uninitialized = -7000
2728 :
2729 : INTEGER :: actually_sent, exhausted_work, handle, i, ind, iounit, ispecial, j, max_targets, &
2730 : more_work, my_pos, my_special_work, my_target, no_overloaded, no_underloaded, nsend, &
2731 : nsend_limit, nsend_max, offset, offset_proc, offset_special, send_total, tags(2)
2732 8 : INTEGER, DIMENSION(:), POINTER :: buffsize, cumulative_work, expected_work, load_imbalance, &
2733 16 : nrecv, nsend_proc, sendbuffer, should_warn, tmp, work_index, work_size
2734 8 : INTEGER, DIMENSION(:, :), POINTER :: targets, tmp_bo
2735 : LOGICAL :: consistent
2736 16 : LOGICAL, DIMENSION(:), POINTER :: mask_recv, mask_send, touched
2737 : REAL(kind=dp) :: average_work, load_scale, &
2738 : very_overloaded, work_factor
2739 8 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: cavity
2740 : TYPE(cdft_control_type), POINTER :: cdft_control
2741 : TYPE(cp_logger_type), POINTER :: logger
2742 40 : TYPE(mp_request_type), DIMENSION(4) :: req
2743 8 : TYPE(mp_request_type), DIMENSION(:), POINTER :: req_recv, req_total
2744 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
2745 :
2746 : TYPE buffers
2747 : LOGICAL, POINTER, DIMENSION(:) :: bv
2748 : INTEGER, POINTER, DIMENSION(:) :: iv
2749 : END TYPE buffers
2750 16 : TYPE(buffers), POINTER, DIMENSION(:) :: recvbuffer, sbuff
2751 : CHARACTER(len=2) :: dummy
2752 :
2753 16 : logger => cp_get_default_logger()
2754 8 : CALL timeset(routineN, handle)
2755 8 : mixed_cdft%dlb_control%recv_work = .FALSE.
2756 8 : mixed_cdft%dlb_control%send_work = .FALSE.
2757 8 : NULLIFY (expected_work, work_index, load_imbalance, work_size, &
2758 8 : cumulative_work, sendbuffer, buffsize, req_recv, req_total, &
2759 8 : tmp, nrecv, nsend_proc, targets, tmp_bo, touched, &
2760 8 : mask_recv, mask_send, cavity, recvbuffer, sbuff, force_env_section, &
2761 8 : print_section, cdft_control)
2762 8 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
2763 8 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
2764 8 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
2765 8 : cdft_control => mixed_cdft%cdft_control
2766 : ! These numerical values control data redistribution and are system sensitive
2767 : ! Currently they are not refined during run time which may cause crashes
2768 : ! However, using too many processors or a confinement cavity that is too large relative to the
2769 : ! total system volume are more likely culprits.
2770 8 : load_scale = mixed_cdft%dlb_control%load_scale
2771 8 : very_overloaded = mixed_cdft%dlb_control%very_overloaded
2772 8 : more_work = mixed_cdft%dlb_control%more_work
2773 8 : max_targets = 40
2774 8 : work_factor = 0.8_dp
2775 : ! Reset targets/sources
2776 8 : IF (mixed_cdft%is_special) THEN
2777 0 : DEALLOCATE (mixed_cdft%dest_list, mixed_cdft%dest_list_bo, &
2778 0 : mixed_cdft%source_list, mixed_cdft%source_list_bo)
2779 : ALLOCATE (mixed_cdft%dest_list(SIZE(mixed_cdft%dest_list_save)), &
2780 : mixed_cdft%dest_list_bo(SIZE(mixed_cdft%dest_bo_save, 1), SIZE(mixed_cdft%dest_bo_save, 2)), &
2781 : mixed_cdft%source_list(SIZE(mixed_cdft%source_list_save)), &
2782 0 : mixed_cdft%source_list_bo(SIZE(mixed_cdft%source_bo_save, 1), SIZE(mixed_cdft%source_bo_save, 2)))
2783 0 : mixed_cdft%dest_list = mixed_cdft%dest_list_save
2784 0 : mixed_cdft%source_list = mixed_cdft%source_list_save
2785 0 : mixed_cdft%dest_list_bo = mixed_cdft%dest_bo_save
2786 0 : mixed_cdft%source_list_bo = mixed_cdft%source_bo_save
2787 : END IF
2788 : ALLOCATE (mixed_cdft%dlb_control%expected_work(force_env%para_env%num_pe), &
2789 : expected_work(force_env%para_env%num_pe), &
2790 48 : work_size(force_env%para_env%num_pe))
2791 : IF (debug_this_module) THEN
2792 : ALLOCATE (should_warn(force_env%para_env%num_pe))
2793 : should_warn = 0
2794 : END IF
2795 24 : expected_work = 0
2796 8 : expected_work(force_env%para_env%mepos + 1) = my_work
2797 24 : work_size = 0
2798 8 : work_size(force_env%para_env%mepos + 1) = my_work_size
2799 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%prediction_error)) THEN
2800 4 : IF (mixed_cdft%is_pencil .OR. mixed_cdft%is_special) THEN
2801 : work_size(force_env%para_env%mepos + 1) = work_size(force_env%para_env%mepos + 1) - &
2802 : NINT(REAL(mixed_cdft%dlb_control% &
2803 : prediction_error(force_env%para_env%mepos + 1), dp)/ &
2804 0 : REAL(bo(2, 1) - bo(1, 1) + 1, dp))
2805 : ELSE
2806 : work_size(force_env%para_env%mepos + 1) = work_size(force_env%para_env%mepos + 1) - &
2807 : NINT(REAL(mixed_cdft%dlb_control% &
2808 : prediction_error(force_env%para_env%mepos + 1), dp)/ &
2809 4 : REAL(bo(2, 2) - bo(1, 2) + 1, dp))
2810 : END IF
2811 : END IF
2812 40 : CALL force_env%para_env%sum(expected_work)
2813 40 : CALL force_env%para_env%sum(work_size)
2814 : ! We store the unsorted expected work to refine the estimate on subsequent calls to this routine
2815 40 : mixed_cdft%dlb_control%expected_work = expected_work
2816 : ! Take into account the prediction error of the last step
2817 8 : IF (ASSOCIATED(mixed_cdft%dlb_control%prediction_error)) &
2818 20 : expected_work = expected_work - mixed_cdft%dlb_control%prediction_error
2819 : !
2820 24 : average_work = REAL(SUM(expected_work), dp)/REAL(force_env%para_env%num_pe, dp)
2821 : ALLOCATE (work_index(force_env%para_env%num_pe), &
2822 : load_imbalance(force_env%para_env%num_pe), &
2823 48 : targets(2, force_env%para_env%num_pe))
2824 40 : load_imbalance = expected_work - NINT(average_work)
2825 8 : no_overloaded = 0
2826 8 : no_underloaded = 0
2827 56 : targets = 0
2828 : ! Convert the load imbalance to a multiple of the actual work size
2829 24 : DO i = 1, force_env%para_env%num_pe
2830 24 : IF (load_imbalance(i) .GT. 0) THEN
2831 8 : no_overloaded = no_overloaded + 1
2832 : ! Allow heavily overloaded processors to dump more data since most likely they have a lot of 'real' work
2833 8 : IF (expected_work(i) .GT. NINT(very_overloaded*average_work)) THEN
2834 0 : load_imbalance(i) = (CEILING(REAL(load_imbalance(i), dp)/REAL(work_size(i), dp)) + more_work)*work_size(i)
2835 : ELSE
2836 8 : load_imbalance(i) = CEILING(REAL(load_imbalance(i), dp)/REAL(work_size(i), dp))*work_size(i)
2837 : END IF
2838 : ELSE
2839 : ! Allow the underloaded processors to take load_scale amount of additional work
2840 : ! otherwise we may be unable to exhaust all overloaded processors
2841 8 : load_imbalance(i) = NINT(load_imbalance(i)*load_scale)
2842 8 : no_underloaded = no_underloaded + 1
2843 : END IF
2844 : END DO
2845 8 : CALL sort(expected_work, force_env%para_env%num_pe, indices=work_index)
2846 : ! Redistribute work in order from the most overloaded processors to the most underloaded processors
2847 : ! Each underloaded processor is limited to one overloaded processor
2848 8 : IF (load_imbalance(force_env%para_env%mepos + 1) > 0) THEN
2849 4 : offset = 0
2850 4 : mixed_cdft%dlb_control%send_work = .TRUE.
2851 : ! Build up the total amount of work that needs redistribution
2852 12 : ALLOCATE (cumulative_work(force_env%para_env%num_pe))
2853 12 : cumulative_work = 0
2854 4 : DO i = force_env%para_env%num_pe, force_env%para_env%num_pe - no_overloaded + 1, -1
2855 4 : IF (work_index(i) == force_env%para_env%mepos + 1) THEN
2856 : EXIT
2857 : ELSE
2858 0 : offset = offset + load_imbalance(work_index(i))
2859 0 : IF (i == force_env%para_env%num_pe) THEN
2860 0 : cumulative_work(i) = load_imbalance(work_index(i))
2861 : ELSE
2862 0 : cumulative_work(i) = cumulative_work(i + 1) + load_imbalance(work_index(i))
2863 : END IF
2864 : END IF
2865 : END DO
2866 4 : my_pos = i
2867 4 : j = force_env%para_env%num_pe
2868 4 : nsend_max = load_imbalance(work_index(j))/work_size(work_index(j))
2869 4 : exhausted_work = 0
2870 : ! Determine send offset by going through all processors that are more overloaded than my_pos
2871 4 : DO i = 1, no_underloaded
2872 4 : IF (my_pos == force_env%para_env%num_pe) EXIT
2873 0 : nsend = -load_imbalance(work_index(i))/work_size(work_index(j))
2874 0 : IF (nsend .LT. 1) nsend = 1
2875 0 : nsend_max = nsend_max - nsend
2876 0 : IF (nsend_max .LT. 0) nsend = nsend + nsend_max
2877 0 : exhausted_work = exhausted_work + nsend*work_size(work_index(j))
2878 0 : offset = offset - nsend*work_size(work_index(j))
2879 0 : IF (offset .LT. 0) EXIT
2880 4 : IF (exhausted_work .EQ. cumulative_work(j)) THEN
2881 0 : j = j - 1
2882 0 : nsend_max = load_imbalance(work_index(j))/work_size(work_index(j))
2883 : END IF
2884 : END DO
2885 : ! Underloaded processors were fully exhausted: rewind index
2886 : ! Load balancing will fail if this happens on multiple processors
2887 4 : IF (i .GT. no_underloaded) THEN
2888 0 : i = no_underloaded
2889 : END IF
2890 4 : my_target = i
2891 4 : DEALLOCATE (cumulative_work)
2892 : ! Determine how much and who to send slices of my grid points
2893 4 : nsend_max = load_imbalance(force_env%para_env%mepos + 1)/work_size(force_env%para_env%mepos + 1)
2894 : ! This the actual number of available array slices
2895 4 : IF (mixed_cdft%is_pencil .OR. mixed_cdft%is_special) THEN
2896 0 : nsend_limit = bo(2, 1) - bo(1, 1) + 1
2897 : ELSE
2898 4 : nsend_limit = bo(2, 2) - bo(1, 2) + 1
2899 : END IF
2900 4 : IF (.NOT. mixed_cdft%is_special) THEN
2901 4 : ALLOCATE (mixed_cdft%dlb_control%target_list(3, max_targets))
2902 : ELSE
2903 0 : ALLOCATE (mixed_cdft%dlb_control%target_list(3 + 2*SIZE(mixed_cdft%dest_list), max_targets))
2904 0 : ALLOCATE (touched(SIZE(mixed_cdft%dest_list)))
2905 0 : touched = .FALSE.
2906 : END IF
2907 644 : mixed_cdft%dlb_control%target_list = uninitialized
2908 4 : i = 1
2909 4 : ispecial = 1
2910 4 : offset_special = 0
2911 4 : targets(1, my_pos) = my_target
2912 4 : send_total = 0
2913 : ! Main loop. Note, we actually allow my_pos to offload more slices than nsend_max
2914 : DO
2915 4 : nsend = -load_imbalance(work_index(my_target))/work_size(force_env%para_env%mepos + 1)
2916 4 : IF (nsend .LT. 1) nsend = 1 ! send at least one block
2917 : ! Prevent over redistribution: leave at least (1-work_factor)*nsend_limit slices to my_pos
2918 4 : IF (nsend .GT. NINT(work_factor*nsend_limit - send_total)) THEN
2919 : nsend = NINT(work_factor*nsend_limit - send_total)
2920 : IF (debug_this_module) &
2921 : should_warn(force_env%para_env%mepos + 1) = 1
2922 : END IF
2923 4 : mixed_cdft%dlb_control%target_list(1, i) = work_index(my_target) - 1 ! This is the actual processor rank
2924 4 : IF (mixed_cdft%is_special) THEN
2925 0 : mixed_cdft%dlb_control%target_list(2, i) = 0
2926 0 : actually_sent = nsend
2927 0 : DO j = ispecial, SIZE(mixed_cdft%dest_list)
2928 0 : mixed_cdft%dlb_control%target_list(2, i) = mixed_cdft%dlb_control%target_list(2, i) + 1
2929 0 : touched(j) = .TRUE.
2930 0 : IF (nsend .LT. mixed_cdft%dest_list_bo(2, j) - mixed_cdft%dest_list_bo(1, j) + 1) THEN
2931 0 : mixed_cdft%dlb_control%target_list(3 + 2*j - 1, i) = mixed_cdft%dest_list_bo(1, j)
2932 0 : mixed_cdft%dlb_control%target_list(3 + 2*j, i) = mixed_cdft%dest_list_bo(1, j) + nsend - 1
2933 0 : mixed_cdft%dest_list_bo(1, j) = mixed_cdft%dest_list_bo(1, j) + nsend
2934 0 : nsend = 0
2935 0 : EXIT
2936 : ELSE
2937 0 : mixed_cdft%dlb_control%target_list(3 + 2*j - 1, i) = mixed_cdft%dest_list_bo(1, j)
2938 0 : mixed_cdft%dlb_control%target_list(3 + 2*j, i) = mixed_cdft%dest_list_bo(2, j)
2939 0 : nsend = nsend - (mixed_cdft%dest_list_bo(2, j) - mixed_cdft%dest_list_bo(1, j) + 1)
2940 0 : mixed_cdft%dest_list_bo(1:2, j) = should_deallocate
2941 : END IF
2942 0 : IF (nsend .LE. 0) EXIT
2943 : END DO
2944 0 : IF (mixed_cdft%dest_list_bo(1, ispecial) .EQ. should_deallocate) ispecial = j + 1
2945 0 : actually_sent = actually_sent - nsend
2946 0 : nsend_max = nsend_max - actually_sent
2947 0 : send_total = send_total + actually_sent
2948 : ELSE
2949 4 : mixed_cdft%dlb_control%target_list(2, i) = nsend
2950 4 : nsend_max = nsend_max - nsend
2951 4 : send_total = send_total + nsend
2952 : END IF
2953 4 : IF (nsend_max .LT. 0) nsend_max = 0
2954 4 : IF (nsend_max .EQ. 0) EXIT
2955 0 : IF (my_target /= no_underloaded) THEN
2956 0 : my_target = my_target + 1
2957 : ELSE
2958 : ! If multiple processors execute this block load balancing will fail
2959 0 : mixed_cdft%dlb_control%target_list(2, i) = mixed_cdft%dlb_control%target_list(2, i) + nsend_max
2960 0 : nsend_max = 0
2961 0 : EXIT
2962 : END IF
2963 0 : i = i + 1
2964 0 : IF (i .GT. max_targets) &
2965 : CALL cp_abort(__LOCATION__, &
2966 4 : "Load balancing error: increase max_targets")
2967 : END DO
2968 4 : IF (.NOT. mixed_cdft%is_special) THEN
2969 4 : CALL reallocate(mixed_cdft%dlb_control%target_list, 1, 3, 1, i)
2970 : ELSE
2971 0 : CALL reallocate(mixed_cdft%dlb_control%target_list, 1, 3 + 2*SIZE(mixed_cdft%dest_list), 1, i)
2972 : END IF
2973 4 : targets(2, my_pos) = my_target
2974 : ! Equalize the load on the target processors
2975 4 : IF (.NOT. mixed_cdft%is_special) THEN
2976 4 : IF (send_total .GT. NINT(work_factor*nsend_limit)) send_total = NINT(work_factor*nsend_limit) - 1
2977 4 : nsend = NINT(REAL(send_total, dp)/REAL(SIZE(mixed_cdft%dlb_control%target_list, 2), dp))
2978 8 : mixed_cdft%dlb_control%target_list(2, :) = nsend
2979 : END IF
2980 : ELSE
2981 4 : DO i = 1, no_underloaded
2982 4 : IF (work_index(i) == force_env%para_env%mepos + 1) EXIT
2983 : END DO
2984 : my_pos = i
2985 : END IF
2986 104 : CALL force_env%para_env%sum(targets)
2987 : IF (debug_this_module) THEN
2988 : CALL force_env%para_env%sum(should_warn)
2989 : IF (ANY(should_warn == 1)) &
2990 : CALL cp_warn(__LOCATION__, &
2991 : "MIXED_CDFT DLB: Attempted to redistribute more array"// &
2992 : " slices than actually available. Leaving a fraction of the total"// &
2993 : " slices on the overloaded processor. Perhaps you have set LOAD_SCALE too high?")
2994 : DEALLOCATE (should_warn)
2995 : END IF
2996 : ! check that there is one-to-one mapping between over- and underloaded processors
2997 8 : IF (force_env%para_env%is_source()) THEN
2998 4 : consistent = .TRUE.
2999 4 : DO i = force_env%para_env%num_pe - 1, force_env%para_env%num_pe - no_overloaded + 1, -1
3000 0 : IF (targets(1, i) .GT. no_underloaded) consistent = .FALSE.
3001 4 : IF (targets(1, i) .GT. targets(2, i + 1)) THEN
3002 : CYCLE
3003 : ELSE
3004 0 : consistent = .FALSE.
3005 : END IF
3006 : END DO
3007 4 : IF (.NOT. consistent) THEN
3008 : IF (debug_this_module .AND. iounit > 0) THEN
3009 : DO i = force_env%para_env%num_pe - 1, force_env%para_env%num_pe - no_overloaded + 1, -1
3010 : WRITE (iounit, '(A,I8,I8,I8,I8,I8)') &
3011 : 'load balancing info', load_imbalance(i), work_index(i), &
3012 : work_size(i), targets(1, i), targets(2, i)
3013 : END DO
3014 : END IF
3015 : CALL cp_abort(__LOCATION__, &
3016 : "Load balancing error: too much data to redistribute."// &
3017 : " Increase LOAD_SCALE or change the number of processors."// &
3018 : " If the confinement cavity occupies a large volume relative"// &
3019 0 : " to the total system volume, it might be worth disabling DLB.")
3020 : END IF
3021 : END IF
3022 : ! Tell the target processors which grid points they should compute
3023 8 : IF (my_pos .LE. no_underloaded) THEN
3024 4 : DO i = force_env%para_env%num_pe, force_env%para_env%num_pe - no_overloaded + 1, -1
3025 4 : IF (targets(1, i) .LE. my_pos .AND. targets(2, i) .GE. my_pos) THEN
3026 4 : mixed_cdft%dlb_control%recv_work = .TRUE.
3027 4 : mixed_cdft%dlb_control%my_source = work_index(i) - 1
3028 4 : EXIT
3029 : END IF
3030 : END DO
3031 4 : IF (mixed_cdft%dlb_control%recv_work) THEN
3032 4 : IF (.NOT. mixed_cdft%is_special) THEN
3033 4 : ALLOCATE (mixed_cdft%dlb_control%bo(12))
3034 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%bo, source=mixed_cdft%dlb_control%my_source, &
3035 4 : request=req(1))
3036 4 : CALL req(1)%wait()
3037 12 : mixed_cdft%dlb_control%my_dest_repl = (/mixed_cdft%dlb_control%bo(11), mixed_cdft%dlb_control%bo(12)/)
3038 12 : mixed_cdft%dlb_control%dest_tags_repl = (/mixed_cdft%dlb_control%bo(9), mixed_cdft%dlb_control%bo(10)/)
3039 : ALLOCATE (mixed_cdft%dlb_control%cavity(mixed_cdft%dlb_control%bo(1):mixed_cdft%dlb_control%bo(2), &
3040 : mixed_cdft%dlb_control%bo(3):mixed_cdft%dlb_control%bo(4), &
3041 20 : mixed_cdft%dlb_control%bo(7):mixed_cdft%dlb_control%bo(8)))
3042 : ALLOCATE (mixed_cdft%dlb_control%weight(mixed_cdft%dlb_control%bo(1):mixed_cdft%dlb_control%bo(2), &
3043 : mixed_cdft%dlb_control%bo(3):mixed_cdft%dlb_control%bo(4), &
3044 20 : mixed_cdft%dlb_control%bo(7):mixed_cdft%dlb_control%bo(8)))
3045 : ALLOCATE (mixed_cdft%dlb_control%gradients(3*natom, &
3046 : mixed_cdft%dlb_control%bo(1):mixed_cdft%dlb_control%bo(2), &
3047 : mixed_cdft%dlb_control%bo(3):mixed_cdft%dlb_control%bo(4), &
3048 24 : mixed_cdft%dlb_control%bo(7):mixed_cdft%dlb_control%bo(8)))
3049 22724 : mixed_cdft%dlb_control%gradients = 0.0_dp
3050 3524 : mixed_cdft%dlb_control%weight = 0.0_dp
3051 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%cavity, source=mixed_cdft%dlb_control%my_source, &
3052 4 : request=req(1))
3053 4 : CALL req(1)%wait()
3054 4 : DEALLOCATE (mixed_cdft%dlb_control%bo)
3055 : ELSE
3056 0 : ALLOCATE (buffsize(1))
3057 : CALL force_env%para_env%irecv(msgout=buffsize, source=mixed_cdft%dlb_control%my_source, &
3058 0 : request=req(1))
3059 0 : CALL req(1)%wait()
3060 0 : ALLOCATE (mixed_cdft%dlb_control%bo(12*buffsize(1)))
3061 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%bo, source=mixed_cdft%dlb_control%my_source, &
3062 0 : request=req(1))
3063 0 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(buffsize(1)))
3064 0 : ALLOCATE (req_recv(buffsize(1)))
3065 0 : DEALLOCATE (buffsize)
3066 0 : CALL req(1)%wait()
3067 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%sendbuff)
3068 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%cavity(mixed_cdft%dlb_control%bo(12*(j - 1) + 1): &
3069 : mixed_cdft%dlb_control%bo(12*(j - 1) + 2), &
3070 : mixed_cdft%dlb_control%bo(12*(j - 1) + 3): &
3071 : mixed_cdft%dlb_control%bo(12*(j - 1) + 4), &
3072 : mixed_cdft%dlb_control%bo(12*(j - 1) + 7): &
3073 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 8)))
3074 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%sendbuff(j)%cavity, &
3075 : source=mixed_cdft%dlb_control%my_source, &
3076 0 : request=req_recv(j))
3077 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%weight(mixed_cdft%dlb_control%bo(12*(j - 1) + 1): &
3078 : mixed_cdft%dlb_control%bo(12*(j - 1) + 2), &
3079 : mixed_cdft%dlb_control%bo(12*(j - 1) + 3): &
3080 : mixed_cdft%dlb_control%bo(12*(j - 1) + 4), &
3081 : mixed_cdft%dlb_control%bo(12*(j - 1) + 7): &
3082 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 8)))
3083 : ALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%gradients(3*natom, &
3084 : mixed_cdft%dlb_control%bo(12*(j - 1) + 1): &
3085 : mixed_cdft%dlb_control%bo(12*(j - 1) + 2), &
3086 : mixed_cdft%dlb_control%bo(12*(j - 1) + 3): &
3087 : mixed_cdft%dlb_control%bo(12*(j - 1) + 4), &
3088 : mixed_cdft%dlb_control%bo(12*(j - 1) + 7): &
3089 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 8)))
3090 0 : mixed_cdft%dlb_control%sendbuff(j)%weight = 0.0_dp
3091 0 : mixed_cdft%dlb_control%sendbuff(j)%gradients = 0.0_dp
3092 : mixed_cdft%dlb_control%sendbuff(j)%tag = (/mixed_cdft%dlb_control%bo(12*(j - 1) + 9), &
3093 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 10)/)
3094 : mixed_cdft%dlb_control%sendbuff(j)%rank = (/mixed_cdft%dlb_control%bo(12*(j - 1) + 11), &
3095 0 : mixed_cdft%dlb_control%bo(12*(j - 1) + 12)/)
3096 : END DO
3097 0 : CALL mp_waitall(req_recv)
3098 0 : DEALLOCATE (req_recv)
3099 : END IF
3100 : END IF
3101 : ELSE
3102 4 : IF (.NOT. mixed_cdft%is_special) THEN
3103 4 : offset = 0
3104 4 : ALLOCATE (sendbuffer(12))
3105 4 : send_total = 0
3106 8 : DO i = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3107 : tags = (/(i - 1)*3 + 1 + force_env%para_env%mepos*6*max_targets, &
3108 12 : (i - 1)*3 + 1 + 3*max_targets + force_env%para_env%mepos*6*max_targets/) ! Unique communicator tags
3109 4 : mixed_cdft%dlb_control%target_list(3, i) = tags(1)
3110 4 : IF (mixed_cdft%is_pencil) THEN
3111 : sendbuffer = (/bo_conf(1, 1) + offset, &
3112 : bo_conf(1, 1) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3113 : bo_conf(1, 2), bo_conf(2, 2), bo(1, 3), bo(2, 3), bo_conf(1, 3), bo_conf(2, 3), &
3114 0 : tags(1), tags(2), mixed_cdft%dest_list(1), mixed_cdft%dest_list(2)/)
3115 : ELSE
3116 : sendbuffer = (/bo_conf(1, 1), bo_conf(2, 1), &
3117 : bo_conf(1, 2) + offset, &
3118 : bo_conf(1, 2) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3119 : bo(1, 3), bo(2, 3), bo_conf(1, 3), bo_conf(2, 3), tags(1), tags(2), &
3120 52 : mixed_cdft%dest_list(1), mixed_cdft%dest_list(2)/)
3121 : END IF
3122 4 : send_total = send_total + mixed_cdft%dlb_control%target_list(2, i) - 1
3123 : CALL force_env%para_env%isend(msgin=sendbuffer, dest=mixed_cdft%dlb_control%target_list(1, i), &
3124 4 : request=req(1))
3125 4 : CALL req(1)%wait()
3126 4 : IF (mixed_cdft%is_pencil) THEN
3127 : ALLOCATE (cavity(bo_conf(1, 1) + offset: &
3128 : bo_conf(1, 1) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3129 0 : bo_conf(1, 2):bo_conf(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
3130 : cavity = cdft_control%becke_control%cavity%array(bo_conf(1, 1) + offset: &
3131 : bo_conf(1, 1) + offset + &
3132 : (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3133 : bo_conf(1, 2):bo_conf(2, 2), &
3134 0 : bo_conf(1, 3):bo_conf(2, 3))
3135 : ELSE
3136 : ALLOCATE (cavity(bo_conf(1, 1):bo_conf(2, 1), &
3137 : bo_conf(1, 2) + offset: &
3138 : bo_conf(1, 2) + offset + (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3139 20 : bo_conf(1, 3):bo_conf(2, 3)))
3140 : cavity = cdft_control%becke_control%cavity%array(bo_conf(1, 1):bo_conf(2, 1), &
3141 : bo_conf(1, 2) + offset: &
3142 : bo_conf(1, 2) + offset + &
3143 : (mixed_cdft%dlb_control%target_list(2, i) - 1), &
3144 7044 : bo_conf(1, 3):bo_conf(2, 3))
3145 : END IF
3146 : CALL force_env%para_env%isend(msgin=cavity, &
3147 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3148 4 : request=req(1))
3149 4 : CALL req(1)%wait()
3150 4 : offset = offset + mixed_cdft%dlb_control%target_list(2, i)
3151 8 : DEALLOCATE (cavity)
3152 : END DO
3153 4 : IF (mixed_cdft%is_pencil) THEN
3154 0 : mixed_cdft%dlb_control%distributed(1) = bo_conf(1, 1)
3155 0 : mixed_cdft%dlb_control%distributed(2) = bo_conf(1, 1) + offset - 1
3156 : ELSE
3157 4 : mixed_cdft%dlb_control%distributed(1) = bo_conf(1, 2)
3158 4 : mixed_cdft%dlb_control%distributed(2) = bo_conf(1, 2) + offset - 1
3159 : END IF
3160 4 : DEALLOCATE (sendbuffer)
3161 : ELSE
3162 0 : ALLOCATE (buffsize(1))
3163 0 : DO i = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3164 0 : buffsize = mixed_cdft%dlb_control%target_list(2, i)
3165 : ! Unique communicator tags (dont actually need these, should be removed)
3166 : tags = (/(i - 1)*3 + 1 + force_env%para_env%mepos*6*max_targets, &
3167 0 : (i - 1)*3 + 1 + 3*max_targets + force_env%para_env%mepos*6*max_targets/)
3168 0 : DO j = 4, SIZE(mixed_cdft%dlb_control%target_list, 1)
3169 0 : IF (mixed_cdft%dlb_control%target_list(j, i) .GT. uninitialized) EXIT
3170 : END DO
3171 0 : offset_special = j
3172 0 : offset_proc = j - 4 - (j - 4)/2
3173 : CALL force_env%para_env%isend(msgin=buffsize, &
3174 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3175 0 : request=req(1))
3176 0 : CALL req(1)%wait()
3177 0 : ALLOCATE (sendbuffer(12*buffsize(1)))
3178 0 : DO j = 1, buffsize(1)
3179 : sendbuffer(12*(j - 1) + 1:12*(j - 1) + 12) = (/mixed_cdft%dlb_control%target_list(offset_special + 2*(j - 1), i), &
3180 : mixed_cdft%dlb_control%target_list(offset_special + 2*j - 1, i), &
3181 : bo_conf(1, 2), bo_conf(2, 2), bo(1, 3), bo(2, 3), &
3182 : bo_conf(1, 3), bo_conf(2, 3), tags(1), tags(2), &
3183 : mixed_cdft%dest_list(j + offset_proc), &
3184 0 : mixed_cdft%dest_list(j + offset_proc) + force_env%para_env%num_pe/2/)
3185 : END DO
3186 : CALL force_env%para_env%isend(msgin=sendbuffer, &
3187 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3188 0 : request=req(1))
3189 0 : CALL req(1)%wait()
3190 0 : DEALLOCATE (sendbuffer)
3191 0 : DO j = 1, buffsize(1)
3192 : ALLOCATE (cavity(mixed_cdft%dlb_control%target_list(offset_special + 2*(j - 1), i): &
3193 : mixed_cdft%dlb_control%target_list(offset_special + 2*j - 1, i), &
3194 0 : bo_conf(1, 2):bo_conf(2, 2), bo_conf(1, 3):bo_conf(2, 3)))
3195 : cavity = cdft_control%becke_control%cavity%array(LBOUND(cavity, 1):UBOUND(cavity, 1), &
3196 : bo_conf(1, 2):bo_conf(2, 2), &
3197 0 : bo_conf(1, 3):bo_conf(2, 3))
3198 : CALL force_env%para_env%isend(msgin=cavity, &
3199 : dest=mixed_cdft%dlb_control%target_list(1, i), &
3200 0 : request=req(1))
3201 0 : CALL req(1)%wait()
3202 0 : DEALLOCATE (cavity)
3203 : END DO
3204 : END DO
3205 0 : DEALLOCATE (buffsize)
3206 : END IF
3207 : END IF
3208 8 : DEALLOCATE (expected_work, work_size, load_imbalance, work_index, targets)
3209 : ! Once calculated, data defined on the distributed grid points is sent directly to the processors that own the
3210 : ! grid points after the constraint is copied onto the two processor groups, instead of sending the data back to
3211 : ! the original owner
3212 8 : IF (mixed_cdft%is_special) THEN
3213 0 : my_special_work = 2
3214 0 : ALLOCATE (mask_send(SIZE(mixed_cdft%dest_list)), mask_recv(SIZE(mixed_cdft%source_list)))
3215 0 : ALLOCATE (nsend_proc(SIZE(mixed_cdft%dest_list)), nrecv(SIZE(mixed_cdft%source_list)))
3216 0 : nrecv = 0
3217 0 : nsend_proc = 0
3218 0 : mask_recv = .FALSE.
3219 0 : mask_send = .FALSE.
3220 : ELSE
3221 : my_special_work = 1
3222 : END IF
3223 40 : ALLOCATE (recvbuffer(SIZE(mixed_cdft%source_list)), sbuff(SIZE(mixed_cdft%dest_list)))
3224 56 : ALLOCATE (req_total(my_special_work*SIZE(mixed_cdft%source_list) + (my_special_work**2)*SIZE(mixed_cdft%dest_list)))
3225 24 : ALLOCATE (mixed_cdft%dlb_control%recv_work_repl(SIZE(mixed_cdft%source_list)))
3226 24 : DO i = 1, SIZE(mixed_cdft%source_list)
3227 16 : NULLIFY (recvbuffer(i)%bv, recvbuffer(i)%iv)
3228 16 : ALLOCATE (recvbuffer(i)%bv(1), recvbuffer(i)%iv(3))
3229 : CALL force_env%para_env%irecv(msgout=recvbuffer(i)%bv, &
3230 : source=mixed_cdft%source_list(i), &
3231 16 : request=req_total(i), tag=1)
3232 16 : IF (mixed_cdft%is_special) &
3233 : CALL force_env%para_env%irecv(msgout=recvbuffer(i)%iv, &
3234 : source=mixed_cdft%source_list(i), &
3235 : request=req_total(i + SIZE(mixed_cdft%source_list)), &
3236 8 : tag=2)
3237 : END DO
3238 16 : DO i = 1, my_special_work
3239 32 : DO j = 1, SIZE(mixed_cdft%dest_list)
3240 16 : IF (i == 1) THEN
3241 16 : NULLIFY (sbuff(j)%iv, sbuff(j)%bv)
3242 16 : ALLOCATE (sbuff(j)%bv(1))
3243 32 : sbuff(j)%bv = mixed_cdft%dlb_control%send_work
3244 16 : IF (mixed_cdft%is_special) THEN
3245 0 : ALLOCATE (sbuff(j)%iv(3))
3246 0 : sbuff(j)%iv(1:2) = mixed_cdft%dest_list_bo(1:2, j)
3247 0 : sbuff(j)%iv(3) = 0
3248 0 : IF (sbuff(j)%iv(1) .EQ. should_deallocate) mask_send(j) = .TRUE.
3249 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3250 0 : sbuff(j)%bv = touched(j)
3251 0 : IF (touched(j)) THEN
3252 0 : nsend = 0
3253 0 : DO ispecial = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3254 0 : IF (mixed_cdft%dlb_control%target_list(4 + 2*(j - 1), ispecial) .NE. uninitialized) &
3255 0 : nsend = nsend + 1
3256 : END DO
3257 0 : sbuff(j)%iv(3) = nsend
3258 0 : nsend_proc(j) = nsend
3259 : END IF
3260 : END IF
3261 : END IF
3262 : END IF
3263 16 : ind = j + (i - 1)*SIZE(mixed_cdft%dest_list) + my_special_work*SIZE(mixed_cdft%source_list)
3264 : CALL force_env%para_env%isend(msgin=sbuff(j)%bv, &
3265 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
3266 16 : request=req_total(ind), tag=1)
3267 16 : IF (mixed_cdft%is_special) &
3268 : CALL force_env%para_env%isend(msgin=sbuff(j)%iv, &
3269 : dest=mixed_cdft%dest_list(j) + (i - 1)*force_env%para_env%num_pe/2, &
3270 8 : request=req_total(ind + 2*SIZE(mixed_cdft%dest_list)), tag=2)
3271 : END DO
3272 : END DO
3273 8 : CALL mp_waitall(req_total)
3274 8 : DEALLOCATE (req_total)
3275 24 : DO i = 1, SIZE(mixed_cdft%source_list)
3276 16 : mixed_cdft%dlb_control%recv_work_repl(i) = recvbuffer(i)%bv(1)
3277 16 : IF (mixed_cdft%is_special .AND. mixed_cdft%dlb_control%recv_work_repl(i)) THEN
3278 0 : mixed_cdft%source_list_bo(1:2, i) = recvbuffer(i)%iv(1:2)
3279 0 : nrecv(i) = recvbuffer(i)%iv(3)
3280 0 : IF (recvbuffer(i)%iv(1) .EQ. should_deallocate) mask_recv(i) = .TRUE.
3281 : END IF
3282 16 : DEALLOCATE (recvbuffer(i)%bv)
3283 24 : IF (ASSOCIATED(recvbuffer(i)%iv)) DEALLOCATE (recvbuffer(i)%iv)
3284 : END DO
3285 24 : DO j = 1, SIZE(mixed_cdft%dest_list)
3286 16 : DEALLOCATE (sbuff(j)%bv)
3287 24 : IF (ASSOCIATED(sbuff(j)%iv)) DEALLOCATE (sbuff(j)%iv)
3288 : END DO
3289 8 : DEALLOCATE (recvbuffer)
3290 : ! For some reason if debug_this_module is true and is_special is false, the deallocate statement
3291 : ! on line 3433 gets executed no matter what (gfortran 5.3.0 bug?). Printing out the variable seems to fix it...
3292 : IF (debug_this_module) THEN
3293 : WRITE (dummy, *) mixed_cdft%is_special
3294 : END IF
3295 :
3296 8 : IF (.NOT. mixed_cdft%is_special) THEN
3297 8 : IF (mixed_cdft%dlb_control%send_work) THEN
3298 32 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl) + 2))
3299 4 : ALLOCATE (sendbuffer(6))
3300 4 : IF (mixed_cdft%is_pencil) THEN
3301 : sendbuffer = (/SIZE(mixed_cdft%dlb_control%target_list, 2), bo_conf(1, 3), bo_conf(2, 3), &
3302 0 : bo_conf(1, 1), bo_conf(1, 2), bo_conf(2, 2)/)
3303 : ELSE
3304 : sendbuffer = (/SIZE(mixed_cdft%dlb_control%target_list, 2), bo_conf(1, 3), bo_conf(2, 3), &
3305 28 : bo_conf(1, 2), bo_conf(1, 1), bo_conf(2, 1)/)
3306 : END IF
3307 8 : ELSE IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3308 24 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl)))
3309 : END IF
3310 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3311 24 : ALLOCATE (mixed_cdft%dlb_control%recv_info(2))
3312 8 : NULLIFY (mixed_cdft%dlb_control%recv_info(1)%target_list, mixed_cdft%dlb_control%recv_info(2)%target_list)
3313 24 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(2))
3314 8 : NULLIFY (mixed_cdft%dlb_control%recvbuff(1)%buffs, mixed_cdft%dlb_control%recvbuff(2)%buffs)
3315 : END IF
3316 : ! First communicate which grid points were distributed
3317 8 : IF (mixed_cdft%dlb_control%send_work) THEN
3318 12 : ind = COUNT(mixed_cdft%dlb_control%recv_work_repl) + 1
3319 12 : DO i = 1, 2
3320 : CALL force_env%para_env%isend(msgin=sendbuffer, &
3321 : dest=mixed_cdft%dest_list(i), &
3322 8 : request=req_total(ind))
3323 12 : ind = ind + 1
3324 : END DO
3325 : END IF
3326 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3327 8 : ind = 1
3328 24 : DO i = 1, 2
3329 24 : IF (mixed_cdft%dlb_control%recv_work_repl(i)) THEN
3330 8 : ALLOCATE (mixed_cdft%dlb_control%recv_info(i)%matrix_info(6))
3331 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recv_info(i)%matrix_info, &
3332 : source=mixed_cdft%source_list(i), &
3333 8 : request=req_total(ind))
3334 8 : ind = ind + 1
3335 : END IF
3336 : END DO
3337 : END IF
3338 8 : IF (ASSOCIATED(req_total)) THEN
3339 8 : CALL mp_waitall(req_total)
3340 : END IF
3341 : ! Now communicate which processor handles which grid points
3342 8 : IF (mixed_cdft%dlb_control%send_work) THEN
3343 12 : ind = COUNT(mixed_cdft%dlb_control%recv_work_repl) + 1
3344 12 : DO i = 1, 2
3345 8 : IF (i == 2) &
3346 8 : mixed_cdft%dlb_control%target_list(3, :) = mixed_cdft%dlb_control%target_list(3, :) + 3*max_targets
3347 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%target_list, &
3348 : dest=mixed_cdft%dest_list(i), &
3349 8 : request=req_total(ind))
3350 12 : ind = ind + 1
3351 : END DO
3352 : END IF
3353 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3354 8 : ind = 1
3355 24 : DO i = 1, 2
3356 24 : IF (mixed_cdft%dlb_control%recv_work_repl(i)) THEN
3357 : ALLOCATE (mixed_cdft%dlb_control%recv_info(i)% &
3358 24 : target_list(3, mixed_cdft%dlb_control%recv_info(i)%matrix_info(1)))
3359 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recv_info(i)%target_list, &
3360 : source=mixed_cdft%source_list(i), &
3361 8 : request=req_total(ind))
3362 8 : ind = ind + 1
3363 : END IF
3364 : END DO
3365 : END IF
3366 8 : IF (ASSOCIATED(req_total)) THEN
3367 8 : CALL mp_waitall(req_total)
3368 8 : DEALLOCATE (req_total)
3369 : END IF
3370 8 : IF (ASSOCIATED(sendbuffer)) DEALLOCATE (sendbuffer)
3371 : ELSE
3372 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3373 0 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl) + 2*COUNT(touched)))
3374 0 : ELSE IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3375 0 : ALLOCATE (req_total(COUNT(mixed_cdft%dlb_control%recv_work_repl)))
3376 : END IF
3377 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3378 0 : ind = COUNT(mixed_cdft%dlb_control%recv_work_repl)
3379 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
3380 0 : IF (touched(j)) THEN
3381 0 : ALLOCATE (sbuff(j)%iv(4 + 3*nsend_proc(j)))
3382 0 : sbuff(j)%iv(1:4) = (/bo_conf(1, 2), bo_conf(2, 2), bo_conf(1, 3), bo_conf(2, 3)/)
3383 0 : offset = 5
3384 0 : DO i = 1, SIZE(mixed_cdft%dlb_control%target_list, 2)
3385 0 : IF (mixed_cdft%dlb_control%target_list(4 + 2*(j - 1), i) .NE. uninitialized) THEN
3386 : sbuff(j)%iv(offset:offset + 2) = (/mixed_cdft%dlb_control%target_list(1, i), &
3387 : mixed_cdft%dlb_control%target_list(4 + 2*(j - 1), i), &
3388 0 : mixed_cdft%dlb_control%target_list(4 + 2*j - 1, i)/)
3389 0 : offset = offset + 3
3390 : END IF
3391 : END DO
3392 0 : DO ispecial = 1, my_special_work
3393 : CALL force_env%para_env%isend(msgin=sbuff(j)%iv, &
3394 : dest=mixed_cdft%dest_list(j) + (ispecial - 1)*force_env%para_env%num_pe/2, &
3395 0 : request=req_total(ind + ispecial))
3396 : END DO
3397 0 : ind = ind + my_special_work
3398 : END IF
3399 : END DO
3400 : END IF
3401 0 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3402 0 : ALLOCATE (mixed_cdft%dlb_control%recv_info(SIZE(mixed_cdft%source_list)))
3403 0 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(SIZE(mixed_cdft%source_list)))
3404 0 : ind = 1
3405 0 : DO j = 1, SIZE(mixed_cdft%source_list)
3406 : NULLIFY (mixed_cdft%dlb_control%recv_info(j)%target_list, &
3407 0 : mixed_cdft%dlb_control%recvbuff(j)%buffs)
3408 0 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
3409 0 : ALLOCATE (mixed_cdft%dlb_control%recv_info(j)%matrix_info(4 + 3*nrecv(j)))
3410 : CALL force_env%para_env%irecv(mixed_cdft%dlb_control%recv_info(j)%matrix_info, &
3411 : source=mixed_cdft%source_list(j), &
3412 0 : request=req_total(ind))
3413 0 : ind = ind + 1
3414 : END IF
3415 : END DO
3416 : END IF
3417 0 : IF (ASSOCIATED(req_total)) THEN
3418 0 : CALL mp_waitall(req_total)
3419 0 : DEALLOCATE (req_total)
3420 : END IF
3421 0 : IF (ANY(mask_send)) THEN
3422 : ALLOCATE (tmp(SIZE(mixed_cdft%dest_list) - COUNT(mask_send)), &
3423 0 : tmp_bo(2, SIZE(mixed_cdft%dest_list) - COUNT(mask_send)))
3424 0 : i = 1
3425 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
3426 0 : IF (.NOT. mask_send(j)) THEN
3427 0 : tmp(i) = mixed_cdft%dest_list(j)
3428 0 : tmp_bo(1:2, i) = mixed_cdft%dest_list_bo(1:2, j)
3429 0 : i = i + 1
3430 : END IF
3431 : END DO
3432 0 : DEALLOCATE (mixed_cdft%dest_list, mixed_cdft%dest_list_bo)
3433 0 : ALLOCATE (mixed_cdft%dest_list(SIZE(tmp)), mixed_cdft%dest_list_bo(2, SIZE(tmp)))
3434 0 : mixed_cdft%dest_list = tmp
3435 0 : mixed_cdft%dest_list_bo = tmp_bo
3436 0 : DEALLOCATE (tmp, tmp_bo)
3437 : END IF
3438 0 : IF (ANY(mask_recv)) THEN
3439 : ALLOCATE (tmp(SIZE(mixed_cdft%source_list) - COUNT(mask_recv)), &
3440 0 : tmp_bo(4, SIZE(mixed_cdft%source_list) - COUNT(mask_recv)))
3441 0 : i = 1
3442 0 : DO j = 1, SIZE(mixed_cdft%source_list)
3443 0 : IF (.NOT. mask_recv(j)) THEN
3444 0 : tmp(i) = mixed_cdft%source_list(j)
3445 0 : tmp_bo(1:4, i) = mixed_cdft%source_list_bo(1:4, j)
3446 0 : i = i + 1
3447 : END IF
3448 : END DO
3449 0 : DEALLOCATE (mixed_cdft%source_list, mixed_cdft%source_list_bo)
3450 0 : ALLOCATE (mixed_cdft%source_list(SIZE(tmp)), mixed_cdft%source_list_bo(4, SIZE(tmp)))
3451 0 : mixed_cdft%source_list = tmp
3452 0 : mixed_cdft%source_list_bo = tmp_bo
3453 0 : DEALLOCATE (tmp, tmp_bo)
3454 : END IF
3455 0 : DEALLOCATE (mask_recv, mask_send)
3456 0 : DEALLOCATE (nsend_proc, nrecv)
3457 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3458 0 : DO j = 1, SIZE(mixed_cdft%dest_list)
3459 0 : IF (touched(j)) DEALLOCATE (sbuff(j)%iv)
3460 : END DO
3461 0 : IF (ASSOCIATED(touched)) DEALLOCATE (touched)
3462 : END IF
3463 : END IF
3464 8 : DEALLOCATE (sbuff)
3465 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
3466 8 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
3467 8 : CALL timestop(handle)
3468 :
3469 16 : END SUBROUTINE mixed_becke_constraint_dlb
3470 :
3471 : ! **************************************************************************************************
3472 : !> \brief Low level routine to build mixed Becke constraint and gradients
3473 : !> \param force_env the force_env that holds the CDFT states
3474 : !> \param mixed_cdft container for structures needed to build the mixed CDFT constraint
3475 : !> \param in_memory decides whether to build the weight function gradients in parallel before solving
3476 : !> the CDFT states or later during the SCF procedure of the individual states
3477 : !> \param is_constraint a list used to determine which atoms in the system define the constraint
3478 : !> \param store_vectors should temporary arrays be stored in memory to accelerate the calculation
3479 : !> \param R12 temporary array holding the pairwise atomic distances
3480 : !> \param position_vecs temporary array holding the pbc corrected atomic position vectors
3481 : !> \param pair_dist_vecs temporary array holding the pairwise displament vectors
3482 : !> \param coefficients array that determines how atoms should be summed to form the constraint
3483 : !> \param catom temporary array to map the global index of constraint atoms to their position
3484 : !> in a list that holds only constraint atoms
3485 : !> \par History
3486 : !> 03.2016 created [Nico Holmberg]
3487 : ! **************************************************************************************************
3488 34 : SUBROUTINE mixed_becke_constraint_low(force_env, mixed_cdft, in_memory, &
3489 : is_constraint, store_vectors, R12, position_vecs, &
3490 : pair_dist_vecs, coefficients, catom)
3491 : TYPE(force_env_type), POINTER :: force_env
3492 : TYPE(mixed_cdft_type), POINTER :: mixed_cdft
3493 : LOGICAL, INTENT(IN) :: in_memory
3494 : LOGICAL, ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: is_constraint
3495 : LOGICAL, INTENT(IN) :: store_vectors
3496 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :), &
3497 : INTENT(INOUT) :: R12, position_vecs
3498 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :), &
3499 : INTENT(INOUT) :: pair_dist_vecs
3500 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:), &
3501 : INTENT(INOUT) :: coefficients
3502 : INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(INOUT) :: catom
3503 :
3504 : CHARACTER(len=*), PARAMETER :: routineN = 'mixed_becke_constraint_low'
3505 :
3506 : INTEGER :: handle, i, iatom, icomm, iforce_eval, index, iounit, ip, ispecial, iwork, j, &
3507 : jatom, jcomm, k, my_special_work, my_work, natom, nbuffs, nforce_eval, np(3), &
3508 : nsent_total, nskipped, nwork, offset, offset_repl
3509 34 : INTEGER, DIMENSION(:), POINTER :: work, work_dlb
3510 34 : INTEGER, DIMENSION(:, :), POINTER :: nsent
3511 : LOGICAL :: completed_recv, should_communicate
3512 34 : LOGICAL, ALLOCATABLE, DIMENSION(:) :: skip_me
3513 34 : LOGICAL, ALLOCATABLE, DIMENSION(:, :) :: completed
3514 : REAL(kind=dp) :: dist1, dist2, dmyexp, my1, my1_homo, &
3515 : myexp, sum_cell_f_all, &
3516 : sum_cell_f_constr, th, tmp_const
3517 34 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: cell_functions, distances, ds_dR_i, &
3518 34 : ds_dR_j
3519 34 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: d_sum_const_dR, d_sum_Pm_dR, &
3520 34 : distance_vecs, dP_i_dRi
3521 34 : REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :, :) :: dP_i_dRj
3522 : REAL(kind=dp), DIMENSION(3) :: cell_v, dist_vec, dmy_dR_i, dmy_dR_j, &
3523 : dr, dr1_r2, dr_i_dR, dr_ij_dR, &
3524 : dr_j_dR, grid_p, r, r1, shift
3525 34 : REAL(kind=dp), DIMENSION(:), POINTER :: cutoffs
3526 34 : REAL(KIND=dp), DIMENSION(:, :, :), POINTER :: cavity, weight
3527 34 : REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER :: gradients
3528 : TYPE(cdft_control_type), POINTER :: cdft_control
3529 : TYPE(cell_type), POINTER :: cell
3530 : TYPE(cp_logger_type), POINTER :: logger
3531 : TYPE(cp_subsys_type), POINTER :: subsys_mix
3532 : TYPE(force_env_type), POINTER :: force_env_qs
3533 34 : TYPE(mp_request_type), DIMENSION(:), POINTER :: req_recv, req_total
3534 34 : TYPE(mp_request_type), DIMENSION(:, :), POINTER :: req_send
3535 : TYPE(particle_list_type), POINTER :: particles
3536 34 : TYPE(particle_type), DIMENSION(:), POINTER :: particle_set
3537 : TYPE(pw_pool_type), POINTER :: auxbas_pw_pool
3538 : TYPE(section_vals_type), POINTER :: force_env_section, print_section
3539 :
3540 68 : logger => cp_get_default_logger()
3541 34 : NULLIFY (work, req_recv, req_send, work_dlb, nsent, cutoffs, cavity, &
3542 34 : weight, gradients, cell, subsys_mix, force_env_qs, &
3543 34 : particle_set, particles, auxbas_pw_pool, force_env_section, &
3544 34 : print_section, cdft_control)
3545 34 : CALL timeset(routineN, handle)
3546 34 : nforce_eval = SIZE(force_env%sub_force_env)
3547 34 : CALL force_env_get(force_env=force_env, force_env_section=force_env_section)
3548 34 : print_section => section_vals_get_subs_vals(force_env_section, "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
3549 34 : iounit = cp_print_key_unit_nr(logger, print_section, '', extension='.mixedLog')
3550 34 : IF (.NOT. force_env%mixed_env%do_mixed_qmmm_cdft) THEN
3551 : CALL force_env_get(force_env=force_env, &
3552 : subsys=subsys_mix, &
3553 26 : cell=cell)
3554 : CALL cp_subsys_get(subsys=subsys_mix, &
3555 : particles=particles, &
3556 26 : particle_set=particle_set)
3557 : ELSE
3558 24 : DO iforce_eval = 1, nforce_eval
3559 16 : IF (.NOT. ASSOCIATED(force_env%sub_force_env(iforce_eval)%force_env)) CYCLE
3560 24 : force_env_qs => force_env%sub_force_env(iforce_eval)%force_env
3561 : END DO
3562 : CALL get_qs_env(force_env_qs%qmmm_env%qs_env, &
3563 : cp_subsys=subsys_mix, &
3564 8 : cell=cell)
3565 : CALL cp_subsys_get(subsys=subsys_mix, &
3566 : particles=particles, &
3567 8 : particle_set=particle_set)
3568 : END IF
3569 34 : natom = SIZE(particles%els)
3570 34 : cdft_control => mixed_cdft%cdft_control
3571 34 : CALL pw_env_get(pw_env=mixed_cdft%pw_env, auxbas_pw_pool=auxbas_pw_pool)
3572 136 : np = auxbas_pw_pool%pw_grid%npts
3573 136 : dr = auxbas_pw_pool%pw_grid%dr
3574 136 : shift = -REAL(MODULO(np, 2), dp)*dr/2.0_dp
3575 170 : ALLOCATE (cell_functions(natom), skip_me(natom))
3576 34 : IF (store_vectors) THEN
3577 68 : ALLOCATE (distances(natom))
3578 102 : ALLOCATE (distance_vecs(3, natom))
3579 : END IF
3580 34 : IF (in_memory) THEN
3581 24 : ALLOCATE (ds_dR_j(3))
3582 24 : ALLOCATE (ds_dR_i(3))
3583 72 : ALLOCATE (d_sum_Pm_dR(3, natom))
3584 48 : ALLOCATE (d_sum_const_dR(3, natom))
3585 96 : ALLOCATE (dP_i_dRj(3, natom, natom))
3586 48 : ALLOCATE (dP_i_dRi(3, natom))
3587 24 : th = 1.0e-8_dp
3588 : END IF
3589 34 : IF (mixed_cdft%dlb) THEN
3590 32 : ALLOCATE (work(force_env%para_env%num_pe), work_dlb(force_env%para_env%num_pe))
3591 24 : work = 0
3592 24 : work_dlb = 0
3593 : END IF
3594 34 : my_work = 1
3595 34 : my_special_work = 1
3596 : ! Load balancing: allocate storage for receiving buffers and post recv requests
3597 34 : IF (mixed_cdft%dlb) THEN
3598 8 : IF (mixed_cdft%dlb_control%recv_work) THEN
3599 4 : my_work = 2
3600 4 : IF (.NOT. mixed_cdft%is_special) THEN
3601 40 : ALLOCATE (req_send(2, 3))
3602 : ELSE
3603 0 : ALLOCATE (req_send(2, 3*SIZE(mixed_cdft%dlb_control%sendbuff)))
3604 : END IF
3605 : END IF
3606 16 : IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
3607 8 : IF (.NOT. mixed_cdft%is_special) THEN
3608 8 : offset_repl = 0
3609 8 : IF (mixed_cdft%dlb_control%recv_work_repl(1) .AND. mixed_cdft%dlb_control%recv_work_repl(2)) THEN
3610 : ALLOCATE (req_recv(3*(SIZE(mixed_cdft%dlb_control%recv_info(1)%target_list, 2) + &
3611 0 : SIZE(mixed_cdft%dlb_control%recv_info(2)%target_list, 2))))
3612 0 : offset_repl = 3*SIZE(mixed_cdft%dlb_control%recv_info(1)%target_list, 2)
3613 8 : ELSE IF (mixed_cdft%dlb_control%recv_work_repl(1)) THEN
3614 0 : ALLOCATE (req_recv(3*(SIZE(mixed_cdft%dlb_control%recv_info(1)%target_list, 2))))
3615 : ELSE
3616 48 : ALLOCATE (req_recv(3*(SIZE(mixed_cdft%dlb_control%recv_info(2)%target_list, 2))))
3617 : END IF
3618 : ELSE
3619 0 : nbuffs = 0
3620 0 : offset_repl = 1
3621 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
3622 0 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
3623 0 : nbuffs = nbuffs + (SIZE(mixed_cdft%dlb_control%recv_info(j)%matrix_info) - 4)/3
3624 : END IF
3625 : END DO
3626 0 : ALLOCATE (req_recv(3*nbuffs))
3627 : END IF
3628 24 : DO j = 1, SIZE(mixed_cdft%dlb_control%recv_work_repl)
3629 24 : IF (mixed_cdft%dlb_control%recv_work_repl(j)) THEN
3630 8 : IF (.NOT. mixed_cdft%is_special) THEN
3631 8 : offset = 0
3632 8 : index = j + (j/2)
3633 64 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(SIZE(mixed_cdft%dlb_control%recv_info(j)%target_list, 2)))
3634 16 : DO i = 1, SIZE(mixed_cdft%dlb_control%recv_info(j)%target_list, 2)
3635 8 : IF (mixed_cdft%is_pencil) THEN
3636 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3637 : weight(mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3638 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3639 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3640 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3641 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3642 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3643 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3644 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3645 : cavity(mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3646 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3647 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3648 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3649 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3650 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3651 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3652 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3653 : gradients(3*natom, &
3654 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3655 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3656 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3657 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3658 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3659 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3660 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3661 : ELSE
3662 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3663 : weight(mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3664 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3665 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3666 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3667 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3668 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3669 40 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3670 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3671 : cavity(mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3672 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3673 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3674 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3675 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3676 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3677 40 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3678 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3679 : gradients(3*natom, &
3680 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(5): &
3681 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(6), &
3682 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset: &
3683 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4) + offset + &
3684 : (mixed_cdft%dlb_control%recv_info(j)%target_list(2, i) - 1), &
3685 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2): &
3686 48 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3)))
3687 : END IF
3688 :
3689 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, &
3690 : source=mixed_cdft%dlb_control%recv_info(j)%target_list(1, i), &
3691 : request=req_recv(3*(i - 1) + (j - 1)*offset_repl + 1), &
3692 8 : tag=mixed_cdft%dlb_control%recv_info(j)%target_list(3, i))
3693 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, &
3694 : source=mixed_cdft%dlb_control%recv_info(j)%target_list(1, i), &
3695 : request=req_recv(3*(i - 1) + (j - 1)*offset_repl + 2), &
3696 8 : tag=mixed_cdft%dlb_control%recv_info(j)%target_list(3, i) + 1)
3697 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, &
3698 : source=mixed_cdft%dlb_control%recv_info(j)%target_list(1, i), &
3699 : request=req_recv(3*(i - 1) + (j - 1)*offset_repl + 3), &
3700 8 : tag=mixed_cdft%dlb_control%recv_info(j)%target_list(3, i) + 2)
3701 16 : offset = offset + mixed_cdft%dlb_control%recv_info(j)%target_list(2, i)
3702 : END DO
3703 8 : DEALLOCATE (mixed_cdft%dlb_control%recv_info(j)%matrix_info)
3704 : ELSE
3705 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)% &
3706 0 : buffs((SIZE(mixed_cdft%dlb_control%recv_info(j)%matrix_info) - 4)/3))
3707 0 : index = 6
3708 0 : DO i = 1, SIZE(mixed_cdft%dlb_control%recvbuff(j)%buffs)
3709 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3710 : weight(mixed_cdft%dlb_control%recv_info(j)%matrix_info(index): &
3711 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(index + 1), &
3712 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(1): &
3713 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2), &
3714 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3): &
3715 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4)))
3716 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3717 : cavity(mixed_cdft%dlb_control%recv_info(j)%matrix_info(index): &
3718 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(index + 1), &
3719 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(1): &
3720 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2), &
3721 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3): &
3722 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4)))
3723 : ALLOCATE (mixed_cdft%dlb_control%recvbuff(j)%buffs(i)% &
3724 : gradients(3*natom, mixed_cdft%dlb_control%recv_info(j)%matrix_info(index): &
3725 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(index + 1), &
3726 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(1): &
3727 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(2), &
3728 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(3): &
3729 0 : mixed_cdft%dlb_control%recv_info(j)%matrix_info(4)))
3730 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%cavity, &
3731 : source=mixed_cdft%dlb_control%recv_info(j)%matrix_info(index - 1), &
3732 0 : request=req_recv(offset_repl), tag=1)
3733 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%weight, &
3734 : source=mixed_cdft%dlb_control%recv_info(j)%matrix_info(index - 1), &
3735 0 : request=req_recv(offset_repl + 1), tag=2)
3736 : CALL force_env%para_env%irecv(msgout=mixed_cdft%dlb_control%recvbuff(j)%buffs(i)%gradients, &
3737 : source=mixed_cdft%dlb_control%recv_info(j)%matrix_info(index - 1), &
3738 0 : request=req_recv(offset_repl + 2), tag=3)
3739 0 : index = index + 3
3740 0 : offset_repl = offset_repl + 3
3741 : END DO
3742 0 : DEALLOCATE (mixed_cdft%dlb_control%recv_info(j)%matrix_info)
3743 : END IF
3744 : END IF
3745 : END DO
3746 : END IF
3747 : END IF
3748 34 : cutoffs => cdft_control%becke_control%cutoffs
3749 34 : should_communicate = .FALSE.
3750 136 : DO i = 1, 3
3751 136 : cell_v(i) = cell%hmat(i, i)
3752 : END DO
3753 72 : DO iwork = my_work, 1, -1
3754 38 : IF (iwork == 2) THEN
3755 4 : IF (.NOT. mixed_cdft%is_special) THEN
3756 4 : cavity => mixed_cdft%dlb_control%cavity
3757 4 : weight => mixed_cdft%dlb_control%weight
3758 4 : gradients => mixed_cdft%dlb_control%gradients
3759 4 : ALLOCATE (completed(2, 3), nsent(2, 3))
3760 : ELSE
3761 0 : my_special_work = SIZE(mixed_cdft%dlb_control%sendbuff)
3762 0 : ALLOCATE (completed(2, 3*my_special_work), nsent(2, 3*my_special_work))
3763 : END IF
3764 40 : completed = .FALSE.
3765 40 : nsent = 0
3766 : ELSE
3767 34 : IF (.NOT. mixed_cdft%is_special) THEN
3768 34 : weight => mixed_cdft%weight
3769 34 : cavity => mixed_cdft%cavity
3770 34 : gradients => cdft_control%group(1)%gradients
3771 : ELSE
3772 0 : my_special_work = SIZE(mixed_cdft%dest_list)
3773 : END IF
3774 : END IF
3775 110 : DO ispecial = 1, my_special_work
3776 38 : nwork = 0
3777 38 : IF (mixed_cdft%is_special) THEN
3778 0 : IF (iwork == 1) THEN
3779 0 : weight => mixed_cdft%sendbuff(ispecial)%weight
3780 0 : cavity => mixed_cdft%sendbuff(ispecial)%cavity
3781 0 : gradients => mixed_cdft%sendbuff(ispecial)%gradients
3782 : ELSE
3783 0 : weight => mixed_cdft%dlb_control%sendbuff(ispecial)%weight
3784 0 : cavity => mixed_cdft%dlb_control%sendbuff(ispecial)%cavity
3785 0 : gradients => mixed_cdft%dlb_control%sendbuff(ispecial)%gradients
3786 : END IF
3787 : END IF
3788 970 : DO k = LBOUND(weight, 1), UBOUND(weight, 1)
3789 856 : IF (mixed_cdft%dlb .AND. mixed_cdft%is_pencil .AND. .NOT. mixed_cdft%is_special) THEN
3790 0 : IF (mixed_cdft%dlb_control%send_work) THEN
3791 0 : IF (k .GE. mixed_cdft%dlb_control%distributed(1) .AND. &
3792 : k .LE. mixed_cdft%dlb_control%distributed(2)) THEN
3793 : CYCLE
3794 : END IF
3795 : END IF
3796 : END IF
3797 39790 : DO j = LBOUND(weight, 2), UBOUND(weight, 2)
3798 37184 : IF (mixed_cdft%dlb .AND. .NOT. mixed_cdft%is_pencil .AND. .NOT. mixed_cdft%is_special) THEN
3799 6400 : IF (mixed_cdft%dlb_control%send_work) THEN
3800 3120 : IF (j .GE. mixed_cdft%dlb_control%distributed(1) .AND. &
3801 : j .LE. mixed_cdft%dlb_control%distributed(2)) THEN
3802 : CYCLE
3803 : END IF
3804 : END IF
3805 : END IF
3806 : ! Check if any of the buffers have become available for deallocation
3807 37184 : IF (should_communicate) THEN
3808 16 : DO icomm = 1, SIZE(nsent, 2)
3809 40 : DO jcomm = 1, SIZE(nsent, 1)
3810 24 : IF (nsent(jcomm, icomm) == 1) CYCLE
3811 24 : completed(jcomm, icomm) = req_send(jcomm, icomm)%test()
3812 24 : IF (completed(jcomm, icomm)) THEN
3813 24 : nsent(jcomm, icomm) = nsent(jcomm, icomm) + 1
3814 24 : nsent_total = nsent_total + 1
3815 24 : IF (nsent_total == SIZE(nsent, 1)*SIZE(nsent, 2)) should_communicate = .FALSE.
3816 : END IF
3817 72 : IF (ALL(completed(:, icomm))) THEN
3818 12 : IF (MODULO(icomm, 3) == 1) THEN
3819 4 : IF (.NOT. mixed_cdft%is_special) THEN
3820 4 : DEALLOCATE (mixed_cdft%dlb_control%cavity)
3821 : ELSE
3822 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff((icomm - 1)/3 + 1)%cavity)
3823 : END IF
3824 8 : ELSE IF (MODULO(icomm, 3) == 2) THEN
3825 4 : IF (.NOT. mixed_cdft%is_special) THEN
3826 4 : DEALLOCATE (mixed_cdft%dlb_control%weight)
3827 : ELSE
3828 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff((icomm - 1)/3 + 1)%weight)
3829 : END IF
3830 : ELSE
3831 4 : IF (.NOT. mixed_cdft%is_special) THEN
3832 4 : DEALLOCATE (mixed_cdft%dlb_control%gradients)
3833 : ELSE
3834 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff((icomm - 1)/3 + 1)%gradients)
3835 : END IF
3836 : END IF
3837 : END IF
3838 : END DO
3839 : END DO
3840 : END IF
3841 : ! Poll to prevent starvation
3842 37184 : IF (ASSOCIATED(req_recv)) &
3843 6400 : completed_recv = mp_testall(req_recv)
3844 : !
3845 1829144 : DO i = LBOUND(weight, 3), UBOUND(weight, 3)
3846 1716736 : IF (cdft_control%becke_control%cavity_confine) THEN
3847 1716736 : IF (cavity(k, j, i) < cdft_control%becke_control%eps_cavity) CYCLE
3848 : END IF
3849 897276 : grid_p(1) = k*dr(1) + shift(1)
3850 897276 : grid_p(2) = j*dr(2) + shift(2)
3851 897276 : grid_p(3) = i*dr(3) + shift(3)
3852 897276 : nskipped = 0
3853 2691828 : cell_functions = 1.0_dp
3854 2691828 : skip_me = .FALSE.
3855 2691828 : IF (store_vectors) distances = 0.0_dp
3856 897276 : IF (in_memory) THEN
3857 6076044 : d_sum_Pm_dR = 0.0_dp
3858 6076044 : d_sum_const_dR = 0.0_dp
3859 6076044 : dP_i_dRi = 0.0_dp
3860 : END IF
3861 2281193 : DO iatom = 1, natom
3862 1794552 : IF (skip_me(iatom)) THEN
3863 57685 : cell_functions(iatom) = 0.0_dp
3864 57685 : IF (cdft_control%becke_control%should_skip) THEN
3865 37642 : IF (is_constraint(iatom)) nskipped = nskipped + 1
3866 37642 : IF (nskipped == cdft_control%natoms) THEN
3867 0 : IF (in_memory) THEN
3868 0 : IF (cdft_control%becke_control%cavity_confine) THEN
3869 0 : cavity(k, j, i) = 0.0_dp
3870 : END IF
3871 : END IF
3872 : EXIT
3873 : END IF
3874 : END IF
3875 : CYCLE
3876 : END IF
3877 1736867 : IF (store_vectors) THEN
3878 1736867 : IF (distances(iatom) .EQ. 0.0_dp) THEN
3879 6387184 : r = position_vecs(:, iatom)
3880 6387184 : dist_vec = (r - grid_p) - ANINT((r - grid_p)/cell_v)*cell_v
3881 6387184 : dist1 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3882 6387184 : distance_vecs(:, iatom) = dist_vec
3883 1596796 : distances(iatom) = dist1
3884 : ELSE
3885 560284 : dist_vec = distance_vecs(:, iatom)
3886 : dist1 = distances(iatom)
3887 : END IF
3888 : ELSE
3889 0 : r = particle_set(iatom)%r
3890 0 : DO ip = 1, 3
3891 0 : r(ip) = MODULO(r(ip), cell%hmat(ip, ip)) - cell%hmat(ip, ip)/2._dp
3892 : END DO
3893 0 : dist_vec = (r - grid_p) - ANINT((r - grid_p)/cell_v)*cell_v
3894 0 : dist1 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3895 : END IF
3896 2223508 : IF (dist1 .LE. cutoffs(iatom)) THEN
3897 395525 : IF (in_memory) THEN
3898 296175 : IF (dist1 .LE. th) dist1 = th
3899 1184700 : dr_i_dR(:) = dist_vec(:)/dist1
3900 : END IF
3901 1186575 : DO jatom = 1, natom
3902 1186575 : IF (jatom .NE. iatom) THEN
3903 395525 : IF (jatom < iatom) THEN
3904 197769 : IF (.NOT. skip_me(jatom)) CYCLE
3905 : END IF
3906 255454 : IF (store_vectors) THEN
3907 255454 : IF (distances(jatom) .EQ. 0.0_dp) THEN
3908 791024 : r1 = position_vecs(:, jatom)
3909 791024 : dist_vec = (r1 - grid_p) - ANINT((r1 - grid_p)/cell_v)*cell_v
3910 791024 : dist2 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3911 791024 : distance_vecs(:, jatom) = dist_vec
3912 197756 : distances(jatom) = dist2
3913 : ELSE
3914 230792 : dist_vec = distance_vecs(:, jatom)
3915 : dist2 = distances(jatom)
3916 : END IF
3917 : ELSE
3918 0 : r1 = particle_set(jatom)%r
3919 0 : DO ip = 1, 3
3920 0 : r1(ip) = MODULO(r1(ip), cell%hmat(ip, ip)) - cell%hmat(ip, ip)/2._dp
3921 : END DO
3922 0 : dist_vec = (r1 - grid_p) - ANINT((r1 - grid_p)/cell_v)*cell_v
3923 0 : dist2 = SQRT(DOT_PRODUCT(dist_vec, dist_vec))
3924 : END IF
3925 255454 : IF (in_memory) THEN
3926 191129 : IF (store_vectors) THEN
3927 764516 : dr1_r2 = pair_dist_vecs(:, iatom, jatom)
3928 : ELSE
3929 0 : dr1_r2 = (r - r1) - ANINT((r - r1)/cell_v)*cell_v
3930 : END IF
3931 191129 : IF (dist2 .LE. th) dist2 = th
3932 191129 : tmp_const = (R12(iatom, jatom)**3)
3933 764516 : dr_ij_dR(:) = dr1_r2(:)/tmp_const
3934 : !derivativ w.r.t. Rj
3935 764516 : dr_j_dR = dist_vec(:)/dist2
3936 764516 : dmy_dR_j(:) = -(dr_j_dR(:)/R12(iatom, jatom) - (dist1 - dist2)*dr_ij_dR(:))
3937 : !derivativ w.r.t. Ri
3938 764516 : dmy_dR_i(:) = dr_i_dR(:)/R12(iatom, jatom) - (dist1 - dist2)*dr_ij_dR(:)
3939 : END IF
3940 255454 : my1 = (dist1 - dist2)/R12(iatom, jatom)
3941 255454 : IF (cdft_control%becke_control%adjust) THEN
3942 166839 : my1_homo = my1
3943 : my1 = my1 + &
3944 166839 : cdft_control%becke_control%aij(iatom, jatom)*(1.0_dp - my1**2)
3945 : END IF
3946 255454 : myexp = 1.5_dp*my1 - 0.5_dp*my1**3
3947 255454 : IF (in_memory) THEN
3948 191129 : dmyexp = 1.5_dp - 1.5_dp*my1**2
3949 : tmp_const = (1.5_dp**2)*dmyexp*(1 - myexp**2)* &
3950 191129 : (1.0_dp - ((1.5_dp*myexp - 0.5_dp*(myexp**3))**2))
3951 :
3952 764516 : ds_dR_i(:) = -0.5_dp*tmp_const*dmy_dR_i(:)
3953 764516 : ds_dR_j(:) = -0.5_dp*tmp_const*dmy_dR_j(:)
3954 191129 : IF (cdft_control%becke_control%adjust) THEN
3955 102514 : tmp_const = 1.0_dp - 2.0_dp*my1_homo*cdft_control%becke_control%aij(iatom, jatom)
3956 410056 : ds_dR_i(:) = ds_dR_i(:)*tmp_const
3957 410056 : ds_dR_j(:) = ds_dR_j(:)*tmp_const
3958 : END IF
3959 : END IF
3960 255454 : myexp = 1.5_dp*myexp - 0.5_dp*myexp**3
3961 255454 : myexp = 1.5_dp*myexp - 0.5_dp*myexp**3
3962 255454 : tmp_const = 0.5_dp*(1.0_dp - myexp)
3963 255454 : cell_functions(iatom) = cell_functions(iatom)*tmp_const
3964 255454 : IF (in_memory) THEN
3965 191129 : IF (ABS(tmp_const) .LE. th) tmp_const = tmp_const + th
3966 764516 : dP_i_dRi(:, iatom) = dP_i_dRi(:, iatom) + ds_dR_i(:)/tmp_const
3967 764516 : dP_i_dRj(:, iatom, jatom) = ds_dR_j(:)/tmp_const
3968 : END IF
3969 :
3970 255454 : IF (dist2 .LE. cutoffs(jatom)) THEN
3971 140071 : tmp_const = 0.5_dp*(1.0_dp + myexp)
3972 140071 : cell_functions(jatom) = cell_functions(jatom)*tmp_const
3973 140071 : IF (in_memory) THEN
3974 105046 : IF (ABS(tmp_const) .LE. th) tmp_const = tmp_const + th
3975 420184 : dP_i_dRj(:, jatom, iatom) = -ds_dR_i(:)/tmp_const
3976 420184 : dP_i_dRi(:, jatom) = dP_i_dRi(:, jatom) - ds_dR_j(:)/tmp_const
3977 : END IF
3978 : ELSE
3979 115383 : skip_me(jatom) = .TRUE.
3980 : END IF
3981 : END IF
3982 : END DO
3983 395525 : IF (in_memory) THEN
3984 1184700 : dP_i_dRi(:, iatom) = cell_functions(iatom)*dP_i_dRi(:, iatom)
3985 1184700 : d_sum_Pm_dR(:, iatom) = d_sum_Pm_dR(:, iatom) + dP_i_dRi(:, iatom)
3986 296175 : IF (is_constraint(iatom)) &
3987 : d_sum_const_dR(:, iatom) = d_sum_const_dR(:, iatom) + dP_i_dRi(:, iatom)* &
3988 1184700 : coefficients(iatom)
3989 888525 : DO jatom = 1, natom
3990 888525 : IF (jatom .NE. iatom) THEN
3991 296175 : IF (jatom < iatom) THEN
3992 148094 : IF (.NOT. skip_me(jatom)) THEN
3993 420184 : dP_i_dRj(:, iatom, jatom) = cell_functions(iatom)*dP_i_dRj(:, iatom, jatom)
3994 420184 : d_sum_Pm_dR(:, jatom) = d_sum_Pm_dR(:, jatom) + dP_i_dRj(:, iatom, jatom)
3995 105046 : IF (is_constraint(iatom)) &
3996 : d_sum_const_dR(:, jatom) = d_sum_const_dR(:, jatom) + &
3997 : dP_i_dRj(:, iatom, jatom)* &
3998 420184 : coefficients(iatom)
3999 : CYCLE
4000 : END IF
4001 : END IF
4002 764516 : dP_i_dRj(:, iatom, jatom) = cell_functions(iatom)*dP_i_dRj(:, iatom, jatom)
4003 764516 : d_sum_Pm_dR(:, jatom) = d_sum_Pm_dR(:, jatom) + dP_i_dRj(:, iatom, jatom)
4004 191129 : IF (is_constraint(iatom)) &
4005 : d_sum_const_dR(:, jatom) = d_sum_const_dR(:, jatom) + dP_i_dRj(:, iatom, jatom)* &
4006 764516 : coefficients(iatom)
4007 : END IF
4008 : END DO
4009 : END IF
4010 : ELSE
4011 1341342 : cell_functions(iatom) = 0.0_dp
4012 1341342 : skip_me(iatom) = .TRUE.
4013 1341342 : IF (cdft_control%becke_control%should_skip) THEN
4014 858928 : IF (is_constraint(iatom)) nskipped = nskipped + 1
4015 858928 : IF (nskipped == cdft_control%natoms) THEN
4016 410635 : IF (in_memory) THEN
4017 252800 : IF (cdft_control%becke_control%cavity_confine) THEN
4018 252800 : cavity(k, j, i) = 0.0_dp
4019 : END IF
4020 : END IF
4021 : EXIT
4022 : END IF
4023 : END IF
4024 : END IF
4025 : END DO
4026 897276 : IF (nskipped == cdft_control%natoms) CYCLE
4027 : sum_cell_f_constr = 0.0_dp
4028 1459923 : DO ip = 1, cdft_control%natoms
4029 : sum_cell_f_constr = sum_cell_f_constr + cell_functions(catom(ip))* &
4030 1459923 : cdft_control%group(1)%coeff(ip)
4031 : END DO
4032 486641 : sum_cell_f_all = 0.0_dp
4033 486641 : nwork = nwork + 1
4034 1459923 : DO ip = 1, natom
4035 1459923 : sum_cell_f_all = sum_cell_f_all + cell_functions(ip)
4036 : END DO
4037 486641 : IF (in_memory) THEN
4038 1266948 : DO iatom = 1, natom
4039 1266948 : IF (ABS(sum_cell_f_all) .GT. 0.0_dp) THEN
4040 : gradients(3*(iatom - 1) + 1:3*(iatom - 1) + 3, k, j, i) = &
4041 : d_sum_const_dR(:, iatom)/sum_cell_f_all - sum_cell_f_constr* &
4042 1529032 : d_sum_Pm_dR(:, iatom)/(sum_cell_f_all**2)
4043 : END IF
4044 : END DO
4045 : END IF
4046 486641 : IF (ABS(sum_cell_f_all) .GT. 0.000001) &
4047 292638 : weight(k, j, i) = sum_cell_f_constr/sum_cell_f_all
4048 : END DO ! i
4049 : END DO ! j
4050 : END DO ! k
4051 : ! Load balancing: post send requests
4052 76 : IF (iwork == 2) THEN
4053 4 : IF (.NOT. mixed_cdft%is_special) THEN
4054 12 : DO i = 1, SIZE(req_send, 1)
4055 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%cavity, &
4056 : dest=mixed_cdft%dlb_control%my_dest_repl(i), &
4057 : request=req_send(i, 1), &
4058 8 : tag=mixed_cdft%dlb_control%dest_tags_repl(i))
4059 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%weight, &
4060 : dest=mixed_cdft%dlb_control%my_dest_repl(i), &
4061 : request=req_send(i, 2), &
4062 8 : tag=mixed_cdft%dlb_control%dest_tags_repl(i) + 1)
4063 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%gradients, &
4064 : dest=mixed_cdft%dlb_control%my_dest_repl(i), &
4065 : request=req_send(i, 3), &
4066 12 : tag=mixed_cdft%dlb_control%dest_tags_repl(i) + 2)
4067 : END DO
4068 : should_communicate = .TRUE.
4069 : nsent_total = 0
4070 : ELSE
4071 0 : DO i = 1, SIZE(req_send, 1)
4072 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%sendbuff(ispecial)%cavity, &
4073 : dest=mixed_cdft%dlb_control%sendbuff(ispecial)%rank(i), &
4074 0 : request=req_send(i, 3*(ispecial - 1) + 1), tag=1)
4075 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%sendbuff(ispecial)%weight, &
4076 : dest=mixed_cdft%dlb_control%sendbuff(ispecial)%rank(i), &
4077 0 : request=req_send(i, 3*(ispecial - 1) + 2), tag=2)
4078 : CALL force_env%para_env%isend(msgin=mixed_cdft%dlb_control%sendbuff(ispecial)%gradients, &
4079 : dest=mixed_cdft%dlb_control%sendbuff(ispecial)%rank(i), &
4080 0 : request=req_send(i, 3*(ispecial - 1) + 3), tag=3)
4081 : END DO
4082 0 : IF (ispecial .EQ. my_special_work) THEN
4083 0 : should_communicate = .TRUE.
4084 0 : nsent_total = 0
4085 : END IF
4086 : END IF
4087 4 : work(mixed_cdft%dlb_control%my_source + 1) = work(mixed_cdft%dlb_control%my_source + 1) + nwork
4088 4 : work_dlb(force_env%para_env%mepos + 1) = work_dlb(force_env%para_env%mepos + 1) + nwork
4089 : ELSE
4090 34 : IF (mixed_cdft%dlb) work(force_env%para_env%mepos + 1) = work(force_env%para_env%mepos + 1) + nwork
4091 34 : IF (mixed_cdft%dlb) work_dlb(force_env%para_env%mepos + 1) = work_dlb(force_env%para_env%mepos + 1) + nwork
4092 : END IF
4093 : END DO ! ispecial
4094 : END DO ! iwork
4095 : ! Load balancing: wait for communication and deallocate sending buffers
4096 34 : IF (mixed_cdft%dlb) THEN
4097 16 : IF (mixed_cdft%dlb_control%recv_work .AND. &
4098 : ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
4099 48 : ALLOCATE (req_total(SIZE(req_recv) + SIZE(req_send, 1)*SIZE(req_send, 2)))
4100 4 : index = SIZE(req_recv)
4101 28 : req_total(1:index) = req_recv
4102 16 : DO i = 1, SIZE(req_send, 2)
4103 40 : DO j = 1, SIZE(req_send, 1)
4104 24 : index = index + 1
4105 36 : req_total(index) = req_send(j, i)
4106 : END DO
4107 : END DO
4108 4 : CALL mp_waitall(req_total)
4109 4 : DEALLOCATE (req_total)
4110 4 : IF (ASSOCIATED(mixed_cdft%dlb_control%cavity)) &
4111 0 : DEALLOCATE (mixed_cdft%dlb_control%cavity)
4112 4 : IF (ASSOCIATED(mixed_cdft%dlb_control%weight)) &
4113 0 : DEALLOCATE (mixed_cdft%dlb_control%weight)
4114 4 : IF (ASSOCIATED(mixed_cdft%dlb_control%gradients)) &
4115 0 : DEALLOCATE (mixed_cdft%dlb_control%gradients)
4116 4 : IF (mixed_cdft%is_special) THEN
4117 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%sendbuff)
4118 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%cavity)) &
4119 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%cavity)
4120 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%weight)) &
4121 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%weight)
4122 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%gradients)) &
4123 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%gradients)
4124 : END DO
4125 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff)
4126 : END IF
4127 4 : DEALLOCATE (req_send, req_recv)
4128 4 : ELSE IF (mixed_cdft%dlb_control%recv_work) THEN
4129 0 : IF (should_communicate) THEN
4130 0 : CALL mp_waitall(req_send)
4131 : END IF
4132 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%cavity)) &
4133 0 : DEALLOCATE (mixed_cdft%dlb_control%cavity)
4134 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%weight)) &
4135 0 : DEALLOCATE (mixed_cdft%dlb_control%weight)
4136 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%gradients)) &
4137 0 : DEALLOCATE (mixed_cdft%dlb_control%gradients)
4138 0 : IF (mixed_cdft%is_special) THEN
4139 0 : DO j = 1, SIZE(mixed_cdft%dlb_control%sendbuff)
4140 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%cavity)) &
4141 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%cavity)
4142 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%weight)) &
4143 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%weight)
4144 0 : IF (ASSOCIATED(mixed_cdft%dlb_control%sendbuff(j)%gradients)) &
4145 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff(j)%gradients)
4146 : END DO
4147 0 : DEALLOCATE (mixed_cdft%dlb_control%sendbuff)
4148 : END IF
4149 0 : DEALLOCATE (req_send)
4150 8 : ELSE IF (ANY(mixed_cdft%dlb_control%recv_work_repl)) THEN
4151 4 : CALL mp_waitall(req_recv)
4152 4 : DEALLOCATE (req_recv)
4153 : END IF
4154 : END IF
4155 34 : IF (mixed_cdft%dlb) THEN
4156 40 : CALL force_env%para_env%sum(work)
4157 40 : CALL force_env%para_env%sum(work_dlb)
4158 8 : IF (.NOT. ASSOCIATED(mixed_cdft%dlb_control%prediction_error)) &
4159 12 : ALLOCATE (mixed_cdft%dlb_control%prediction_error(force_env%para_env%num_pe))
4160 40 : mixed_cdft%dlb_control%prediction_error = mixed_cdft%dlb_control%expected_work - work
4161 : IF (debug_this_module .AND. iounit > 0) THEN
4162 : DO i = 1, SIZE(work, 1)
4163 : WRITE (iounit, '(A,I10,I10,I10)') &
4164 : 'Work', work(i), work_dlb(i), mixed_cdft%dlb_control%expected_work(i)
4165 : END DO
4166 : END IF
4167 8 : DEALLOCATE (work, work_dlb, mixed_cdft%dlb_control%expected_work)
4168 : END IF
4169 34 : NULLIFY (gradients, weight, cavity)
4170 34 : IF (ALLOCATED(coefficients)) &
4171 34 : DEALLOCATE (coefficients)
4172 34 : IF (in_memory) THEN
4173 24 : DEALLOCATE (ds_dR_j)
4174 24 : DEALLOCATE (ds_dR_i)
4175 24 : DEALLOCATE (d_sum_Pm_dR)
4176 24 : DEALLOCATE (d_sum_const_dR)
4177 24 : DEALLOCATE (dP_i_dRj)
4178 24 : DEALLOCATE (dP_i_dRi)
4179 24 : NULLIFY (gradients)
4180 24 : IF (store_vectors) THEN
4181 24 : DEALLOCATE (pair_dist_vecs)
4182 : END IF
4183 : END IF
4184 34 : NULLIFY (cutoffs)
4185 34 : IF (ALLOCATED(is_constraint)) &
4186 34 : DEALLOCATE (is_constraint)
4187 34 : DEALLOCATE (catom)
4188 34 : DEALLOCATE (R12)
4189 34 : DEALLOCATE (cell_functions)
4190 34 : DEALLOCATE (skip_me)
4191 34 : IF (ALLOCATED(completed)) &
4192 4 : DEALLOCATE (completed)
4193 34 : IF (ASSOCIATED(nsent)) &
4194 4 : DEALLOCATE (nsent)
4195 34 : IF (store_vectors) THEN
4196 34 : DEALLOCATE (distances)
4197 34 : DEALLOCATE (distance_vecs)
4198 34 : DEALLOCATE (position_vecs)
4199 : END IF
4200 34 : IF (ASSOCIATED(req_send)) &
4201 0 : DEALLOCATE (req_send)
4202 34 : IF (ASSOCIATED(req_recv)) &
4203 0 : DEALLOCATE (req_recv)
4204 : CALL cp_print_key_finished_output(iounit, logger, force_env_section, &
4205 34 : "MIXED%MIXED_CDFT%PRINT%PROGRAM_RUN_INFO")
4206 34 : CALL timestop(handle)
4207 :
4208 68 : END SUBROUTINE mixed_becke_constraint_low
4209 :
4210 : END MODULE mixed_cdft_methods
|