Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : MODULE farming_methods
10 : USE cp_files, ONLY: get_unit_number
11 : USE cp_log_handling, ONLY: cp_get_default_logger,&
12 : cp_logger_type
13 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
14 : cp_print_key_generate_filename,&
15 : cp_print_key_unit_nr
16 : USE farming_types, ONLY: farming_env_type,&
17 : init_job_type,&
18 : job_finished,&
19 : job_pending,&
20 : job_running
21 : USE input_section_types, ONLY: section_vals_get,&
22 : section_vals_get_subs_vals,&
23 : section_vals_type,&
24 : section_vals_val_get
25 : USE message_passing, ONLY: mp_para_env_type
26 : #include "./base/base_uses.f90"
27 :
28 : IMPLICIT NONE
29 : PRIVATE
30 : PUBLIC :: farming_parse_input, get_next_job
31 :
32 : ! must be negative in order to avoid confusion with job numbers
33 : INTEGER, PARAMETER, PUBLIC :: do_nothing = -1, &
34 : do_wait = -2, &
35 : do_deadlock = -3
36 :
37 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'farming_methods'
38 :
39 : CONTAINS
40 :
41 : ! **************************************************************************************************
42 : !> \brief ...
43 : !> \param farming_env ...
44 : !> \param start ...
45 : !> \param END ...
46 : !> \param current ...
47 : !> \param todo ...
48 : ! **************************************************************************************************
49 20 : SUBROUTINE get_next_job(farming_env, start, END, current, todo)
50 : TYPE(farming_env_type), POINTER :: farming_env
51 : INTEGER, INTENT(IN) :: start, END
52 : INTEGER, INTENT(INOUT) :: current
53 : INTEGER, INTENT(OUT) :: todo
54 :
55 : INTEGER :: icheck, idep, itry, ndep
56 : LOGICAL :: dep_ok
57 :
58 20 : IF (farming_env%cycle) THEN
59 0 : IF (current < start) THEN
60 0 : current = start
61 : ELSE
62 0 : current = current + 1
63 : END IF
64 0 : IF (current > END) THEN
65 0 : todo = do_nothing
66 : ELSE
67 0 : todo = MODULO(current - 1, farming_env%njobs) + 1
68 : END IF
69 : ELSE
70 : ! find a pending job
71 20 : itry = start
72 20 : todo = do_nothing
73 110 : DO itry = start, END
74 110 : IF (farming_env%job(itry)%status == job_pending) THEN
75 :
76 : ! see if all dependencies are OK
77 22 : ndep = SIZE(farming_env%job(itry)%dependencies)
78 22 : dep_ok = .TRUE.
79 30 : dep: DO idep = 1, ndep
80 128 : DO icheck = start, END
81 110 : IF (farming_env%job(icheck)%status .NE. job_finished) THEN
82 42 : IF (farming_env%job(icheck)%id == farming_env%job(itry)%dependencies(idep)) THEN
83 : dep_ok = .FALSE.
84 : EXIT dep
85 : END IF
86 : END IF
87 : END DO
88 : END DO dep
89 :
90 : ! if there are pending jobs, the minion can not be told to stop
91 : ! at least wait if there are unresolved dependencies
92 22 : IF (dep_OK) THEN
93 18 : todo = itry
94 18 : EXIT
95 : ELSE
96 4 : todo = do_wait
97 : END IF
98 : END IF
99 : END DO
100 : ! If we have to wait, but there are no running jobs we are deadlocked
101 : ! which we signal
102 20 : IF (todo == do_wait) THEN
103 : dep_OK = .FALSE.
104 0 : DO itry = start, END
105 0 : IF (farming_env%job(itry)%status .EQ. job_running) dep_OK = .TRUE.
106 : END DO
107 0 : IF (.NOT. dep_OK) todo = do_deadlock
108 : END IF
109 : END IF
110 20 : END SUBROUTINE get_next_job
111 :
112 : ! **************************************************************************************************
113 : !> \brief ...
114 : !> \param farming_env ...
115 : !> \param root_section ...
116 : !> \param para_env ...
117 : ! **************************************************************************************************
118 24 : SUBROUTINE farming_parse_input(farming_env, root_section, para_env)
119 : TYPE(farming_env_type), POINTER :: farming_env
120 : TYPE(section_vals_type), POINTER :: root_section
121 : TYPE(mp_para_env_type), POINTER :: para_env
122 :
123 : CHARACTER(LEN=3) :: text
124 : INTEGER :: i, iunit, n_rep_val, num_minions, &
125 : output_unit, stat
126 24 : INTEGER, DIMENSION(:), POINTER :: dependencies, i_vals
127 : LOGICAL :: explicit, has_dep
128 : TYPE(cp_logger_type), POINTER :: logger
129 : TYPE(section_vals_type), POINTER :: farming_section, jobs_section, print_key
130 :
131 24 : NULLIFY (farming_section, jobs_section, print_key, logger, dependencies, i_vals)
132 48 : logger => cp_get_default_logger()
133 24 : farming_env%group_size_wish_set = .FALSE.
134 24 : farming_env%ngroup_wish_set = .FALSE.
135 24 : farming_section => section_vals_get_subs_vals(root_section, "FARMING")
136 :
137 24 : IF (ASSOCIATED(farming_env%group_partition)) THEN
138 0 : DEALLOCATE (farming_env%group_partition)
139 : END IF
140 :
141 : ! The following input order is used
142 : ! 1) GROUP_PARTITION
143 : ! 2) NGROUP
144 : ! 3) GROUP_SIZE (default 8)
145 : CALL section_vals_val_get(farming_section, "GROUP_PARTITION", &
146 24 : n_rep_val=n_rep_val)
147 24 : IF (n_rep_val > 0) THEN
148 : CALL section_vals_val_get(farming_section, "GROUP_PARTITION", &
149 0 : i_vals=i_vals)
150 0 : ALLOCATE (farming_env%group_partition(0:SIZE(i_vals) - 1))
151 0 : farming_env%group_partition(:) = i_vals
152 0 : farming_env%ngroup_wish_set = .TRUE.
153 0 : farming_env%ngroup_wish = SIZE(i_vals)
154 : ELSE
155 : CALL section_vals_val_get(farming_section, "NGROUP", &
156 24 : n_rep_val=n_rep_val)
157 24 : IF (n_rep_val > 0) THEN
158 : CALL section_vals_val_get(farming_section, "NGROUP", &
159 20 : i_val=farming_env%ngroup_wish)
160 20 : farming_env%ngroup_wish_set = .TRUE.
161 : ELSE
162 : CALL section_vals_val_get(farming_section, "GROUP_SIZE", &
163 4 : i_val=farming_env%group_size_wish)
164 4 : farming_env%group_size_wish_set = .TRUE.
165 : END IF
166 : END IF
167 : CALL section_vals_val_get(farming_section, "STRIDE", &
168 24 : i_val=farming_env%stride)
169 :
170 : CALL section_vals_val_get(farming_section, "RESTART_FILE_NAME", &
171 24 : explicit=explicit)
172 24 : IF (explicit) THEN
173 : CALL section_vals_val_get(farming_section, "RESTART_FILE_NAME", &
174 2 : c_val=farming_env%restart_file_name)
175 : ELSE
176 22 : print_key => section_vals_get_subs_vals(farming_section, "RESTART")
177 : farming_env%restart_file_name = cp_print_key_generate_filename(logger, print_key, extension=".restart", &
178 22 : my_local=.FALSE.)
179 : END IF
180 :
181 : CALL section_vals_val_get(farming_section, "DO_RESTART", &
182 24 : l_val=farming_env%restart)
183 : CALL section_vals_val_get(farming_section, "MAX_JOBS_PER_GROUP", &
184 24 : i_val=farming_env%max_steps)
185 : CALL section_vals_val_get(farming_section, "CYCLE", &
186 24 : l_val=farming_env%cycle)
187 : CALL section_vals_val_get(farming_section, "WAIT_TIME", &
188 24 : r_val=farming_env%wait_time)
189 :
190 : CALL section_vals_val_get(farming_section, "CAPTAIN_MINION", &
191 24 : l_val=farming_env%captain_minion)
192 :
193 24 : jobs_section => section_vals_get_subs_vals(farming_section, "JOB")
194 24 : CALL section_vals_get(jobs_section, n_repetition=farming_env%njobs)
195 :
196 232 : ALLOCATE (farming_env%Job(farming_env%njobs))
197 184 : CALL init_job_type(farming_env%job)
198 :
199 24 : has_dep = .FALSE.
200 184 : DO i = 1, farming_env%njobs
201 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
202 160 : keyword_name="DIRECTORY", c_val=farming_env%Job(i)%cwd)
203 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
204 160 : keyword_name="INPUT_FILE_NAME", c_val=farming_env%Job(i)%input)
205 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
206 160 : keyword_name="OUTPUT_FILE_NAME", c_val=farming_env%Job(i)%output)
207 :
208 : ! if job id is not specified the job id is the index
209 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
210 160 : keyword_name="JOB_ID", n_rep_val=n_rep_val)
211 160 : IF (n_rep_val == 0) THEN
212 142 : farming_env%Job(i)%id = i
213 : ELSE
214 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
215 18 : keyword_name="JOB_ID", i_val=farming_env%Job(i)%id)
216 : END IF
217 :
218 : ! get dependencies
219 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
220 160 : keyword_name="DEPENDENCIES", n_rep_val=n_rep_val)
221 344 : IF (n_rep_val == 0) THEN
222 146 : ALLOCATE (farming_env%Job(i)%dependencies(0))
223 : ELSE
224 : CALL section_vals_val_get(jobs_section, i_rep_section=i, &
225 14 : keyword_name="DEPENDENCIES", i_vals=dependencies)
226 42 : ALLOCATE (farming_env%Job(i)%dependencies(SIZE(dependencies, 1)))
227 46 : farming_env%Job(i)%dependencies = dependencies
228 14 : IF (SIZE(dependencies, 1) .NE. 0) has_dep = .TRUE.
229 : END IF
230 : END DO
231 :
232 24 : IF (has_dep) THEN
233 2 : CPASSERT(farming_env%captain_minion)
234 2 : CPASSERT(.NOT. farming_env%cycle)
235 : END IF
236 :
237 : output_unit = cp_print_key_unit_nr(logger, farming_section, "PROGRAM_RUN_INFO", &
238 24 : extension=".log")
239 :
240 : ! Captain/Minion not supported
241 24 : IF (para_env%num_pe == 1) THEN
242 0 : farming_env%captain_minion = .FALSE.
243 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| Captain-Minion setup not supported for serial runs"
244 : END IF
245 24 : IF (farming_env%captain_minion) THEN
246 4 : num_minions = para_env%num_pe - 1
247 : ELSE
248 20 : num_minions = para_env%num_pe
249 : END IF
250 :
251 24 : IF (output_unit > 0) THEN
252 12 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of jobs found", farming_env%njobs
253 12 : IF (farming_env%ngroup_wish_set) THEN
254 10 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Ngroup wish:", farming_env%ngroup_wish
255 10 : IF (ASSOCIATED(farming_env%group_partition)) THEN
256 0 : WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| User partition:"
257 0 : DO i = 0, SIZE(farming_env%group_partition) - 1
258 0 : IF (MODULO(i, 4) == 0) WRITE (output_unit, *)
259 0 : WRITE (output_unit, FMT='(I4)', ADVANCE="NO") farming_env%group_partition(i)
260 : END DO
261 0 : WRITE (output_unit, *)
262 0 : IF (SUM(farming_env%group_partition) .NE. num_minions) THEN
263 : WRITE (output_unit, FMT="(T2,A,T61,I10,T71,I10)") &
264 0 : "FARMING| WARNING : group partition CPUs not equal to the available number (ignoring Captain) ", &
265 0 : num_minions, SUM(farming_env%group_partition)
266 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| partition data ignored" ! any better idea ??
267 0 : DEALLOCATE (farming_env%group_partition)
268 : END IF
269 : END IF
270 : END IF
271 12 : IF (farming_env%group_size_wish_set) THEN
272 2 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Group size wish:", &
273 4 : farming_env%group_size_wish
274 : END IF
275 12 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Max steps :", farming_env%max_steps
276 12 : IF (farming_env%cycle) THEN
277 1 : text = "YES"
278 : ELSE
279 11 : text = " NO"
280 : END IF
281 12 : WRITE (output_unit, FMT="(T2,A,T78,A3)") "FARMING| Cyclic jobs execution:", text
282 12 : IF (farming_env%restart) THEN
283 1 : text = "YES"
284 : ELSE
285 11 : text = " NO"
286 : END IF
287 12 : WRITE (output_unit, FMT="(T2,A,T78,A3)") "FARMING| Restarting farm:", text
288 12 : farming_env%restart_n = 1
289 12 : IF (farming_env%restart) THEN
290 1 : iunit = get_unit_number()
291 1 : OPEN (UNIT=iunit, FILE=farming_env%restart_file_name, IOSTAT=stat)
292 1 : IF (stat == 0) THEN
293 1 : READ (UNIT=iunit, FMT=*, IOSTAT=stat) farming_env%restart_n
294 1 : IF (stat /= 0) THEN
295 : WRITE (output_unit, "(T2,A)") &
296 0 : "FARMING| ---- WARNING ---- failed to read from ("//TRIM(farming_env%restart_file_name)//") starting at 1"
297 : ELSE
298 : WRITE (output_unit, "(T2,A)") &
299 1 : "FARMING| restarting from ("//TRIM(farming_env%restart_file_name)//")"
300 : WRITE (output_unit, "(T2,A,T71,I10)") &
301 1 : "FARMING| restarting at ", farming_env%restart_n
302 : END IF
303 : ELSE
304 : WRITE (output_unit, "(T2,A)") &
305 0 : "FARMING| ---- WARNING ---- failed to open ("//TRIM(farming_env%restart_file_name)//"), starting at 1"
306 : END IF
307 1 : CLOSE (iunit, IOSTAT=stat)
308 : END IF
309 :
310 : CALL cp_print_key_finished_output(output_unit, logger, farming_section, &
311 12 : "PROGRAM_RUN_INFO")
312 : END IF
313 24 : CALL para_env%bcast(farming_env%restart_n)
314 :
315 72 : END SUBROUTINE
316 :
317 : END MODULE farming_methods
|