Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Handles the MPI communication of the swarm framework.
10 : !> \author Ole Schuett
11 : ! **************************************************************************************************
12 : MODULE swarm_mpi
13 : USE cp_files, ONLY: close_file,&
14 : open_file
15 : USE cp_iter_types, ONLY: cp_iteration_info_create,&
16 : cp_iteration_info_release,&
17 : cp_iteration_info_type
18 : USE cp_log_handling, ONLY: cp_add_default_logger,&
19 : cp_get_default_logger,&
20 : cp_logger_create,&
21 : cp_logger_release,&
22 : cp_logger_type,&
23 : cp_rm_default_logger
24 : USE input_section_types, ONLY: section_vals_type,&
25 : section_vals_val_set
26 : USE kinds, ONLY: default_path_length,&
27 : default_string_length
28 : USE machine, ONLY: default_output_unit
29 : USE message_passing, ONLY: mp_any_source,&
30 : mp_comm_type,&
31 : mp_para_env_release,&
32 : mp_para_env_type
33 : USE swarm_message, ONLY: swarm_message_get,&
34 : swarm_message_mpi_bcast,&
35 : swarm_message_mpi_recv,&
36 : swarm_message_mpi_send,&
37 : swarm_message_type
38 : #include "../base/base_uses.f90"
39 :
40 : IMPLICIT NONE
41 : PRIVATE
42 :
43 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'swarm_mpi'
44 :
45 : PUBLIC :: swarm_mpi_type, swarm_mpi_init, swarm_mpi_finalize
46 : PUBLIC :: swarm_mpi_send_report, swarm_mpi_recv_report
47 : PUBLIC :: swarm_mpi_send_command, swarm_mpi_recv_command
48 :
49 : TYPE swarm_mpi_type
50 : TYPE(mp_para_env_type), POINTER :: world => Null()
51 : TYPE(mp_para_env_type), POINTER :: worker => Null()
52 : TYPE(mp_para_env_type), POINTER :: master => Null()
53 : INTEGER, DIMENSION(:), ALLOCATABLE :: wid2group
54 : CHARACTER(LEN=default_path_length) :: master_output_path = ""
55 : END TYPE swarm_mpi_type
56 :
57 : CONTAINS
58 :
59 : ! **************************************************************************************************
60 : !> \brief Initialize MPI communicators for a swarm run.
61 : !> \param swarm_mpi ...
62 : !> \param world_para_env ...
63 : !> \param root_section ...
64 : !> \param n_workers ...
65 : !> \param worker_id ...
66 : !> \param iw ...
67 : !> \author Ole Schuett
68 : ! **************************************************************************************************
69 6 : SUBROUTINE swarm_mpi_init(swarm_mpi, world_para_env, root_section, n_workers, worker_id, iw)
70 : TYPE(swarm_mpi_type) :: swarm_mpi
71 : TYPE(mp_para_env_type), POINTER :: world_para_env
72 : TYPE(section_vals_type), POINTER :: root_section
73 : INTEGER, INTENT(IN) :: n_workers
74 : INTEGER, INTENT(OUT) :: worker_id
75 : INTEGER, INTENT(IN) :: iw
76 :
77 : INTEGER :: n_groups_created, pe_per_worker, &
78 : subgroup_rank, subgroup_size
79 : TYPE(mp_comm_type) :: subgroup
80 : LOGICAL :: im_the_master
81 6 : INTEGER, DIMENSION(:), POINTER :: group_distribution_p
82 : INTEGER, DIMENSION(0:world_para_env%num_pe-2), &
83 12 : TARGET :: group_distribution
84 :
85 : ! ====== Setup of MPI-Groups ======
86 :
87 6 : worker_id = -1
88 6 : swarm_mpi%world => world_para_env
89 :
90 6 : IF (MOD(swarm_mpi%world%num_pe - 1, n_workers) /= 0) THEN
91 0 : CPABORT("number of processors-1 is not divisible by n_workers.")
92 : END IF
93 6 : IF (swarm_mpi%world%num_pe < n_workers + 1) THEN
94 0 : CPABORT("There are not enough processes for n_workers + 1. Aborting.")
95 : END IF
96 :
97 6 : pe_per_worker = (swarm_mpi%world%num_pe - 1)/n_workers
98 :
99 6 : IF (iw > 0) THEN
100 3 : WRITE (iw, '(A,45X,I8)') " SWARM| Number of mpi ranks", swarm_mpi%world%num_pe
101 3 : WRITE (iw, '(A,47X,I8)') " SWARM| Number of workers", n_workers
102 : END IF
103 :
104 : ! the last task becomes the master. Preseves node-alignment of other tasks.
105 6 : im_the_master = (swarm_mpi%world%mepos == swarm_mpi%world%num_pe - 1)
106 :
107 : ! First split split para_env into a master- and a workers-groups...
108 6 : IF (im_the_master) THEN
109 3 : ALLOCATE (swarm_mpi%master)
110 3 : CALL swarm_mpi%master%from_split(swarm_mpi%world, 1)
111 3 : subgroup_size = swarm_mpi%master%num_pe
112 3 : subgroup_rank = swarm_mpi%master%mepos
113 3 : IF (swarm_mpi%master%num_pe /= 1) CPABORT("mp_comm_split_direct failed (master)")
114 : ELSE
115 3 : CALL subgroup%from_split(swarm_mpi%world, 2)
116 3 : subgroup_size = subgroup%num_pe
117 3 : subgroup_rank = subgroup%mepos
118 3 : IF (subgroup_size /= swarm_mpi%world%num_pe - 1) CPABORT("mp_comm_split_direct failed (worker)")
119 : END IF
120 :
121 18 : ALLOCATE (swarm_mpi%wid2group(n_workers))
122 12 : swarm_mpi%wid2group = 0
123 :
124 6 : IF (.NOT. im_the_master) THEN
125 : ! ...then split workers-group into n_workers groups - one for each worker.
126 3 : group_distribution_p => group_distribution
127 3 : ALLOCATE (swarm_mpi%worker)
128 3 : CALL swarm_mpi%worker%from_split(subgroup, n_groups_created, group_distribution_p, n_subgroups=n_workers)
129 3 : worker_id = group_distribution(subgroup_rank) + 1 ! shall start by 1
130 3 : IF (n_groups_created /= n_workers) CPABORT("mp_comm_split failed.")
131 3 : CALL subgroup%free()
132 :
133 : !WRITE (*,*) "this is worker ", worker_id, swarm_mpi%worker%mepos, swarm_mpi%worker%num_pe
134 :
135 : ! collect world-ranks of each worker groups rank-0 node
136 3 : IF (swarm_mpi%worker%mepos == 0) &
137 3 : swarm_mpi%wid2group(worker_id) = swarm_mpi%world%mepos
138 :
139 : END IF
140 :
141 6 : CALL swarm_mpi%world%sum(swarm_mpi%wid2group)
142 : !WRITE (*,*), "wid2group table: ",swarm_mpi%wid2group
143 :
144 6 : CALL logger_init_master(swarm_mpi)
145 6 : CALL logger_init_worker(swarm_mpi, root_section, worker_id)
146 6 : END SUBROUTINE swarm_mpi_init
147 :
148 : ! **************************************************************************************************
149 : !> \brief Helper routine for swarm_mpi_init, configures the master's logger.
150 : !> \param swarm_mpi ...
151 : !> \author Ole Schuett
152 : ! **************************************************************************************************
153 6 : SUBROUTINE logger_init_master(swarm_mpi)
154 : TYPE(swarm_mpi_type) :: swarm_mpi
155 :
156 : INTEGER :: output_unit
157 : TYPE(cp_logger_type), POINTER :: logger
158 :
159 : ! broadcast master_output_path to all ranks
160 :
161 6 : IF (swarm_mpi%world%is_source()) THEN
162 3 : logger => cp_get_default_logger()
163 3 : output_unit = logger%default_local_unit_nr
164 3 : swarm_mpi%master_output_path = output_unit2path(output_unit)
165 3 : IF (output_unit /= default_output_unit) &
166 0 : CLOSE (output_unit)
167 : END IF
168 :
169 6 : CALL swarm_mpi%world%bcast(swarm_mpi%master_output_path)
170 :
171 6 : IF (ASSOCIATED(swarm_mpi%master)) &
172 3 : CALL error_add_new_logger(swarm_mpi%master, swarm_mpi%master_output_path)
173 6 : END SUBROUTINE logger_init_master
174 :
175 : ! **************************************************************************************************
176 : !> \brief Helper routine for logger_init_master, inquires filename for given unit.
177 : !> \param output_unit ...
178 : !> \return ...
179 : !> \author Ole Schuett
180 : ! **************************************************************************************************
181 3 : FUNCTION output_unit2path(output_unit) RESULT(output_path)
182 : INTEGER, INTENT(IN) :: output_unit
183 : CHARACTER(LEN=default_path_length) :: output_path
184 :
185 3 : output_path = "__STD_OUT__"
186 3 : IF (output_unit /= default_output_unit) &
187 0 : INQUIRE (unit=output_unit, name=output_path)
188 3 : END FUNCTION output_unit2path
189 :
190 : ! **************************************************************************************************
191 : !> \brief Helper routine for swarm_mpi_init, configures the workers's logger.
192 : !> \param swarm_mpi ...
193 : !> \param root_section ...
194 : !> \param worker_id ...
195 : !> \author Ole Schuett
196 : ! **************************************************************************************************
197 6 : SUBROUTINE logger_init_worker(swarm_mpi, root_section, worker_id)
198 : TYPE(swarm_mpi_type) :: swarm_mpi
199 : TYPE(section_vals_type), POINTER :: root_section
200 : INTEGER :: worker_id
201 :
202 : CHARACTER(LEN=default_path_length) :: output_path
203 : CHARACTER(len=default_string_length) :: new_project_name, project_name, &
204 : worker_name
205 : TYPE(cp_iteration_info_type), POINTER :: new_iter_info
206 : TYPE(cp_logger_type), POINTER :: old_logger
207 :
208 6 : NULLIFY (old_logger, new_iter_info)
209 6 : IF (ASSOCIATED(swarm_mpi%worker)) THEN
210 3 : old_logger => cp_get_default_logger()
211 3 : project_name = old_logger%iter_info%project_name
212 3 : IF (worker_id > 99999) THEN
213 0 : CPABORT("Did not expect so many workers.")
214 : END IF
215 3 : WRITE (worker_name, "(A,I5.5)") 'WORKER', worker_id
216 3 : IF (LEN_TRIM(project_name) + 1 + LEN_TRIM(worker_name) > default_string_length) THEN
217 0 : CPABORT("project name too long")
218 : END IF
219 3 : output_path = TRIM(project_name)//"-"//TRIM(worker_name)//".out"
220 3 : new_project_name = TRIM(project_name)//"-"//TRIM(worker_name)
221 3 : CALL section_vals_val_set(root_section, "GLOBAL%PROJECT_NAME", c_val=new_project_name)
222 3 : CALL cp_iteration_info_create(new_iter_info, new_project_name)
223 3 : CALL error_add_new_logger(swarm_mpi%worker, output_path, new_iter_info)
224 3 : CALL cp_iteration_info_release(new_iter_info)
225 : END IF
226 6 : END SUBROUTINE logger_init_worker
227 :
228 : ! **************************************************************************************************
229 : !> \brief Helper routine for logger_init_master and logger_init_worker
230 : !> \param para_env ...
231 : !> \param output_path ...
232 : !> \param iter_info ...
233 : !> \author Ole Schuett
234 : ! **************************************************************************************************
235 6 : SUBROUTINE error_add_new_logger(para_env, output_path, iter_info)
236 : TYPE(mp_para_env_type), POINTER :: para_env
237 : CHARACTER(LEN=default_path_length) :: output_path
238 : TYPE(cp_iteration_info_type), OPTIONAL, POINTER :: iter_info
239 :
240 : INTEGER :: output_unit
241 : TYPE(cp_logger_type), POINTER :: new_logger, old_logger
242 :
243 6 : NULLIFY (new_logger, old_logger)
244 6 : output_unit = -1
245 6 : IF (para_env%is_source()) THEN
246 : ! open output_unit according to output_path
247 6 : output_unit = default_output_unit
248 6 : IF (output_path /= "__STD_OUT__") &
249 : CALL open_file(file_name=output_path, file_status="UNKNOWN", &
250 3 : file_action="WRITE", file_position="APPEND", unit_number=output_unit)
251 : END IF
252 :
253 6 : old_logger => cp_get_default_logger()
254 : CALL cp_logger_create(new_logger, para_env=para_env, &
255 : default_global_unit_nr=output_unit, close_global_unit_on_dealloc=.FALSE., &
256 6 : template_logger=old_logger, iter_info=iter_info)
257 :
258 6 : CALL cp_add_default_logger(new_logger)
259 6 : CALL cp_logger_release(new_logger)
260 6 : END SUBROUTINE error_add_new_logger
261 :
262 : ! **************************************************************************************************
263 : !> \brief Finalizes the MPI communicators of a swarm run.
264 : !> \param swarm_mpi ...
265 : !> \param root_section ...
266 : !> \author Ole Schuett
267 : ! **************************************************************************************************
268 6 : SUBROUTINE swarm_mpi_finalize(swarm_mpi, root_section)
269 : TYPE(swarm_mpi_type) :: swarm_mpi
270 : TYPE(section_vals_type), POINTER :: root_section
271 :
272 6 : CALL swarm_mpi%world%sync()
273 6 : CALL logger_finalize(swarm_mpi, root_section)
274 :
275 6 : IF (ASSOCIATED(swarm_mpi%worker)) CALL mp_para_env_release(swarm_mpi%worker)
276 6 : IF (ASSOCIATED(swarm_mpi%master)) CALL mp_para_env_release(swarm_mpi%master)
277 6 : NULLIFY (swarm_mpi%worker, swarm_mpi%master)
278 6 : DEALLOCATE (swarm_mpi%wid2group)
279 6 : END SUBROUTINE swarm_mpi_finalize
280 :
281 : ! **************************************************************************************************
282 : !> \brief Helper routine for swarm_mpi_finalize, restores the original loggers
283 : !> \param swarm_mpi ...
284 : !> \param root_section ...
285 : !> \author Ole Schuett
286 : ! **************************************************************************************************
287 6 : SUBROUTINE logger_finalize(swarm_mpi, root_section)
288 : TYPE(swarm_mpi_type) :: swarm_mpi
289 : TYPE(section_vals_type), POINTER :: root_section
290 :
291 : INTEGER :: output_unit
292 : TYPE(cp_logger_type), POINTER :: logger, old_logger
293 :
294 6 : NULLIFY (logger, old_logger)
295 6 : logger => cp_get_default_logger()
296 6 : output_unit = logger%default_local_unit_nr
297 6 : IF (output_unit > 0 .AND. output_unit /= default_output_unit) &
298 0 : CALL close_file(output_unit)
299 :
300 6 : CALL cp_rm_default_logger() !pops the top-most logger
301 6 : old_logger => cp_get_default_logger()
302 :
303 : ! restore GLOBAL%PROJECT_NAME
304 : CALL section_vals_val_set(root_section, "GLOBAL%PROJECT_NAME", &
305 6 : c_val=old_logger%iter_info%project_name)
306 :
307 6 : CALL swarm_mpi%world%sync()
308 :
309 : ! do this only on master's rank 0
310 6 : IF (swarm_mpi%world%is_source() .AND. output_unit /= default_output_unit) THEN
311 0 : output_unit = old_logger%default_local_unit_nr
312 : OPEN (unit=output_unit, file=swarm_mpi%master_output_path, &
313 0 : status="UNKNOWN", action="WRITE", position="APPEND")
314 : END IF
315 6 : END SUBROUTINE logger_finalize
316 :
317 : ! **************************************************************************************************
318 : !> \brief Sends a report via MPI
319 : !> \param swarm_mpi ...
320 : !> \param report ...
321 : !> \author Ole Schuett
322 : ! **************************************************************************************************
323 51 : SUBROUTINE swarm_mpi_send_report(swarm_mpi, report)
324 : TYPE(swarm_mpi_type) :: swarm_mpi
325 : TYPE(swarm_message_type) :: report
326 :
327 : INTEGER :: dest, tag
328 :
329 : ! Only rank-0 of worker group sends its report
330 :
331 51 : IF (swarm_mpi%worker%is_source()) THEN
332 51 : dest = swarm_mpi%world%num_pe - 1
333 51 : tag = 42
334 51 : CALL swarm_message_mpi_send(report, group=swarm_mpi%world, dest=dest, tag=tag)
335 : END IF
336 :
337 51 : END SUBROUTINE swarm_mpi_send_report
338 :
339 : ! **************************************************************************************************
340 : !> \brief Receives a report via MPI
341 : !> \param swarm_mpi ...
342 : !> \param report ...
343 : !> \author Ole Schuett
344 : ! **************************************************************************************************
345 51 : SUBROUTINE swarm_mpi_recv_report(swarm_mpi, report)
346 : TYPE(swarm_mpi_type) :: swarm_mpi
347 : TYPE(swarm_message_type), INTENT(OUT) :: report
348 :
349 : INTEGER :: src, tag
350 :
351 51 : tag = 42
352 51 : src = mp_any_source
353 :
354 51 : CALL swarm_message_mpi_recv(report, group=swarm_mpi%world, src=src, tag=tag)
355 :
356 51 : END SUBROUTINE swarm_mpi_recv_report
357 :
358 : ! **************************************************************************************************
359 : !> \brief Sends a command via MPI
360 : !> \param swarm_mpi ...
361 : !> \param cmd ...
362 : !> \author Ole Schuett
363 : ! **************************************************************************************************
364 51 : SUBROUTINE swarm_mpi_send_command(swarm_mpi, cmd)
365 : TYPE(swarm_mpi_type) :: swarm_mpi
366 : TYPE(swarm_message_type) :: cmd
367 :
368 : INTEGER :: dest, tag, worker_id
369 :
370 51 : CALL swarm_message_get(cmd, "worker_id", worker_id)
371 51 : tag = 42
372 51 : dest = swarm_mpi%wid2group(worker_id)
373 :
374 51 : CALL swarm_message_mpi_send(cmd, group=swarm_mpi%world, dest=dest, tag=tag)
375 :
376 51 : END SUBROUTINE swarm_mpi_send_command
377 :
378 : ! **************************************************************************************************
379 : !> \brief Receives a command via MPI and broadcasts it within a worker.
380 : !> \param swarm_mpi ...
381 : !> \param cmd ...
382 : !> \author Ole Schuett
383 : ! **************************************************************************************************
384 51 : SUBROUTINE swarm_mpi_recv_command(swarm_mpi, cmd)
385 : TYPE(swarm_mpi_type) :: swarm_mpi
386 : TYPE(swarm_message_type), INTENT(OUT) :: cmd
387 :
388 : INTEGER :: src, tag
389 :
390 : ! This is a two step communication schema.
391 : ! First: The rank-0 of the worker groups receives the command from the master.
392 :
393 51 : IF (swarm_mpi%worker%is_source()) THEN
394 51 : src = swarm_mpi%world%num_pe - 1 !
395 51 : tag = 42
396 51 : CALL swarm_message_mpi_recv(cmd, group=swarm_mpi%world, src=src, tag=tag)
397 :
398 : END IF
399 :
400 : ! ! Second: The command is broadcasted within the worker group.
401 51 : CALL swarm_message_mpi_bcast(cmd, src=swarm_mpi%worker%source, group=swarm_mpi%worker)
402 :
403 51 : END SUBROUTINE swarm_mpi_recv_command
404 :
405 0 : END MODULE swarm_mpi
406 :
|