Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : MODULE cp2k_runs
10 : USE atom, ONLY: atom_code
11 : USE bibliography, ONLY: Iannuzzi2026,&
12 : cite_reference,&
13 : cp2kqs2020
14 : USE bsse, ONLY: do_bsse_calculation
15 : USE cell_opt, ONLY: cp_cell_opt
16 : USE cp2k_debug, ONLY: cp2k_debug_energy_and_forces
17 : USE cp2k_info, ONLY: compile_date,&
18 : compile_revision,&
19 : cp2k_version,&
20 : cp2k_year
21 : USE cp_control_types, ONLY: dft_control_type
22 : USE cp_dbcsr_api, ONLY: dbcsr_finalize_lib,&
23 : dbcsr_init_lib,&
24 : dbcsr_print_config,&
25 : dbcsr_print_statistics
26 : USE cp_dbcsr_cp2k_link, ONLY: cp_dbcsr_config
27 : USE cp_files, ONLY: close_file,&
28 : open_file
29 : USE cp_log_handling, ONLY: cp_get_default_logger,&
30 : cp_logger_type,&
31 : cp_logger_would_log,&
32 : cp_note_level
33 : USE cp_output_handling, ONLY: cp_add_iter_level,&
34 : cp_print_key_finished_output,&
35 : cp_print_key_unit_nr,&
36 : cp_rm_iter_level
37 : USE cp_parser_methods, ONLY: parser_search_string
38 : USE cp_parser_types, ONLY: cp_parser_type,&
39 : parser_create,&
40 : parser_release
41 : USE cp_units, ONLY: cp_unit_set_create,&
42 : cp_unit_set_release,&
43 : cp_unit_set_type,&
44 : export_units_as_xml
45 : USE dbm_api, ONLY: dbm_library_print_stats
46 : USE environment, ONLY: cp2k_finalize,&
47 : cp2k_init,&
48 : cp2k_read,&
49 : cp2k_setup
50 : USE f77_interface, ONLY: create_force_env,&
51 : destroy_force_env,&
52 : f77_default_para_env => default_para_env,&
53 : f_env_add_defaults,&
54 : f_env_rm_defaults,&
55 : f_env_type
56 : USE farming_methods, ONLY: do_deadlock,&
57 : do_nothing,&
58 : do_wait,&
59 : farming_parse_input,&
60 : get_next_job
61 : USE farming_types, ONLY: deallocate_farming_env,&
62 : farming_env_type,&
63 : init_farming_env,&
64 : job_finished,&
65 : job_running
66 : USE force_env_methods, ONLY: force_env_calc_energy_force
67 : USE force_env_types, ONLY: force_env_get,&
68 : force_env_type
69 : USE geo_opt, ONLY: cp_geo_opt
70 : USE global_types, ONLY: global_environment_type,&
71 : globenv_create,&
72 : globenv_release
73 : USE grid_api, ONLY: grid_library_print_stats,&
74 : grid_library_set_config
75 : USE input_constants, ONLY: &
76 : bsse_run, cell_opt_run, debug_run, do_atom, do_band, do_cp2k, do_embed, do_farming, &
77 : do_fist, do_ipi, do_mixed, do_nnp, do_opt_basis, do_optimize_input, do_qmmm, do_qs, &
78 : do_sirius, do_swarm, do_tamc, do_test, do_tree_mc, do_tree_mc_ana, driver_run, ehrenfest, &
79 : energy_force_run, energy_run, geo_opt_run, linear_response_run, mimic_run, mol_dyn_run, &
80 : mon_car_run, negf_run, none_run, pint_run, real_time_propagation, rtp_method_bse, &
81 : tree_mc_run, vib_anal
82 : USE input_cp2k, ONLY: create_cp2k_root_section
83 : USE input_cp2k_check, ONLY: check_cp2k_input
84 : USE input_cp2k_global, ONLY: create_global_section
85 : USE input_cp2k_read, ONLY: read_input
86 : USE input_keyword_types, ONLY: keyword_release
87 : USE input_parsing, ONLY: section_vals_parse
88 : USE input_section_types, ONLY: &
89 : section_release, section_type, section_vals_create, section_vals_get_subs_vals, &
90 : section_vals_release, section_vals_retain, section_vals_type, section_vals_val_get, &
91 : section_vals_write, write_section_xml
92 : USE ipi_driver, ONLY: run_driver
93 : USE kinds, ONLY: default_path_length,&
94 : default_string_length,&
95 : dp,&
96 : int_8
97 : USE library_tests, ONLY: lib_test
98 : USE machine, ONLY: default_output_unit,&
99 : m_chdir,&
100 : m_flush,&
101 : m_getcwd,&
102 : m_memory,&
103 : m_memory_max,&
104 : m_walltime
105 : USE mc_run, ONLY: do_mon_car
106 : USE md_run, ONLY: qs_mol_dyn
107 : USE message_passing, ONLY: mp_any_source,&
108 : mp_comm_type,&
109 : mp_para_env_release,&
110 : mp_para_env_type
111 : USE mimic_loop, ONLY: do_mimic_loop
112 : USE mscfg_methods, ONLY: do_mol_loop,&
113 : loop_over_molecules
114 : USE neb_methods, ONLY: neb
115 : USE negf_methods, ONLY: do_negf
116 : USE offload_api, ONLY: offload_get_chosen_device,&
117 : offload_get_device_count,&
118 : offload_mempool_stats_print
119 : USE optimize_basis, ONLY: run_optimize_basis
120 : USE optimize_input, ONLY: run_optimize_input
121 : USE pint_methods, ONLY: do_pint_run
122 : USE qs_environment_types, ONLY: get_qs_env
123 : USE qs_linres_module, ONLY: linres_calculation
124 : USE reference_manager, ONLY: export_references_as_xml
125 : USE rt_bse, ONLY: run_propagation_bse
126 : USE rt_propagation, ONLY: rt_prop_setup
127 : USE swarm, ONLY: run_swarm
128 : USE tamc_run, ONLY: qs_tamc
129 : USE tmc_setup, ONLY: do_analyze_files,&
130 : do_tmc
131 : USE vibrational_analysis, ONLY: vb_anal
132 : #include "../base/base_uses.f90"
133 :
134 : IMPLICIT NONE
135 :
136 : PRIVATE
137 :
138 : PUBLIC :: write_xml_file, run_input
139 :
140 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp2k_runs'
141 :
142 : CONTAINS
143 :
144 : ! **************************************************************************************************
145 : !> \brief performs an instance of a cp2k run
146 : !> \param input_declaration ...
147 : !> \param input_file_name name of the file to be opened for input
148 : !> \param output_unit unit to which output should be written
149 : !> \param mpi_comm ...
150 : !> \param initial_variables key-value list of initial preprocessor variables
151 : !> \author Joost VandeVondele
152 : !> \note
153 : !> para_env should be a valid communicator
154 : !> output_unit should be writeable by at least the lowest rank of the mpi group
155 : !>
156 : !> recursive because a given run_type might need to be able to perform
157 : !> another cp2k_run as part of its job (e.g. farming, classical equilibration, ...)
158 : !>
159 : !> the idea is that a cp2k instance should be able to run with just three
160 : !> arguments, i.e. a given input file, output unit, mpi communicator.
161 : !> giving these three to cp2k_run should produce a valid run.
162 : !> the only task of the PROGRAM cp2k is to create valid instances of the
163 : !> above arguments. Ideally, anything that is called afterwards should be
164 : !> able to run simultaneously / multithreaded / sequential / parallel / ...
165 : !> and able to fail safe
166 : ! **************************************************************************************************
167 9941 : RECURSIVE SUBROUTINE cp2k_run(input_declaration, input_file_name, output_unit, mpi_comm, initial_variables)
168 : TYPE(section_type), POINTER :: input_declaration
169 : CHARACTER(LEN=*), INTENT(IN) :: input_file_name
170 : INTEGER, INTENT(IN) :: output_unit
171 :
172 : CLASS(mp_comm_type) :: mpi_comm
173 : CHARACTER(len=default_path_length), &
174 : DIMENSION(:, :), INTENT(IN) :: initial_variables
175 :
176 : INTEGER :: f_env_handle, grid_backend, ierr, &
177 : iter_level, method_name_id, &
178 : new_env_id, prog_name_id, run_type_id
179 : #if defined(__DBCSR_ACC)
180 : INTEGER, TARGET :: offload_chosen_device
181 : #endif
182 : INTEGER, POINTER :: active_device_id
183 : INTEGER(KIND=int_8) :: m_memory_max_mpi
184 : LOGICAL :: echo_input, grid_apply_cutoff, &
185 : grid_validate, I_was_ionode
186 : TYPE(cp_logger_type), POINTER :: logger, sublogger
187 : TYPE(mp_para_env_type), POINTER :: para_env
188 : TYPE(dft_control_type), POINTER :: dft_control
189 : TYPE(f_env_type), POINTER :: f_env
190 : TYPE(force_env_type), POINTER :: force_env
191 : TYPE(global_environment_type), POINTER :: globenv
192 : TYPE(section_vals_type), POINTER :: glob_section, input_file, root_section
193 :
194 9941 : NULLIFY (para_env, f_env, dft_control, active_device_id)
195 9941 : ALLOCATE (para_env)
196 9941 : para_env = mpi_comm
197 :
198 : #if defined(__DBCSR_ACC)
199 : IF (offload_get_device_count() > 0) THEN
200 : offload_chosen_device = offload_get_chosen_device()
201 : active_device_id => offload_chosen_device
202 : END IF
203 : #endif
204 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
205 9941 : accdrv_active_device_id=active_device_id)
206 :
207 9941 : NULLIFY (globenv, force_env)
208 :
209 9941 : CALL cite_reference(cp2kqs2020)
210 9941 : CALL cite_reference(Iannuzzi2026)
211 :
212 : ! Parse the input
213 : input_file => read_input(input_declaration, input_file_name, &
214 : initial_variables=initial_variables, &
215 9941 : para_env=para_env)
216 :
217 9941 : CALL para_env%sync()
218 :
219 9941 : logger => cp_get_default_logger()
220 :
221 9941 : glob_section => section_vals_get_subs_vals(input_file, "GLOBAL")
222 9941 : CALL section_vals_val_get(glob_section, "ECHO_INPUT", l_val=echo_input)
223 9941 : IF (echo_input .AND. (output_unit > 0)) THEN
224 : CALL section_vals_write(input_file, &
225 : unit_nr=output_unit, &
226 : hide_root=.TRUE., &
227 16 : hide_defaults=.FALSE.)
228 : END IF
229 :
230 9941 : CALL check_cp2k_input(input_declaration, input_file, para_env=para_env, output_unit=output_unit)
231 9941 : root_section => input_file
232 : CALL section_vals_val_get(input_file, "GLOBAL%PROGRAM_NAME", &
233 9941 : i_val=prog_name_id)
234 : CALL section_vals_val_get(input_file, "GLOBAL%RUN_TYPE", &
235 9941 : i_val=run_type_id)
236 9941 : CALL section_vals_val_get(root_section, "FORCE_EVAL%METHOD", i_val=method_name_id)
237 :
238 9941 : IF (prog_name_id /= do_cp2k) THEN
239 : ! initial setup (cp2k does in in the creation of the force_env)
240 524 : CALL globenv_create(globenv)
241 524 : CALL section_vals_retain(input_file)
242 524 : CALL cp2k_init(para_env, output_unit, globenv, input_file_name=input_file_name)
243 524 : CALL cp2k_read(root_section, para_env, globenv)
244 524 : CALL cp2k_setup(root_section, para_env, globenv)
245 : END IF
246 :
247 9941 : CALL cp_dbcsr_config(root_section)
248 9941 : IF (output_unit > 0 .AND. &
249 : cp_logger_would_log(logger, cp_note_level)) THEN
250 4999 : CALL dbcsr_print_config(unit_nr=output_unit)
251 4999 : WRITE (UNIT=output_unit, FMT='()')
252 : END IF
253 :
254 : ! Configure the grid library.
255 9941 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%BACKEND", i_val=grid_backend)
256 9941 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%VALIDATE", l_val=grid_validate)
257 9941 : CALL section_vals_val_get(root_section, "GLOBAL%GRID%APPLY_CUTOFF", l_val=grid_apply_cutoff)
258 :
259 : CALL grid_library_set_config(backend=grid_backend, &
260 : validate=grid_validate, &
261 9941 : apply_cutoff=grid_apply_cutoff)
262 :
263 364 : SELECT CASE (prog_name_id)
264 : CASE (do_atom)
265 364 : globenv%run_type_id = none_run
266 364 : CALL atom_code(root_section)
267 : CASE (do_optimize_input)
268 6 : CALL run_optimize_input(input_declaration, root_section, para_env)
269 : CASE (do_swarm)
270 6 : CALL run_swarm(input_declaration, root_section, para_env, globenv, input_file_name)
271 : CASE (do_farming) ! TODO: refactor cp2k's startup code
272 24 : CALL dbcsr_finalize_lib()
273 24 : CALL farming_run(input_declaration, root_section, para_env, initial_variables)
274 : CALL dbcsr_init_lib(mpi_comm%get_handle(), io_unit=output_unit, &
275 24 : accdrv_active_device_id=active_device_id)
276 : CASE (do_opt_basis)
277 4 : CALL run_optimize_basis(input_declaration, root_section, para_env)
278 4 : globenv%run_type_id = none_run
279 : CASE (do_cp2k)
280 : CALL create_force_env(new_env_id, &
281 : input_declaration=input_declaration, &
282 : input_path=input_file_name, &
283 : output_path="__STD_OUT__", mpi_comm=para_env, &
284 : output_unit=output_unit, &
285 : owns_out_unit=.FALSE., &
286 9417 : input=input_file, ierr=ierr)
287 9417 : CPASSERT(ierr == 0)
288 9417 : CALL f_env_add_defaults(new_env_id, f_env, handle=f_env_handle)
289 9417 : force_env => f_env%force_env
290 9417 : CALL force_env_get(force_env, globenv=globenv)
291 : CASE (do_test)
292 80 : CALL lib_test(root_section, para_env, globenv)
293 : CASE (do_tree_mc) ! TMC entry point
294 28 : CALL do_tmc(input_declaration, root_section, para_env, globenv)
295 : CASE (do_tree_mc_ana)
296 12 : CALL do_analyze_files(input_declaration, root_section, para_env)
297 : CASE default
298 19358 : CPABORT("")
299 : END SELECT
300 9941 : CALL section_vals_release(input_file)
301 :
302 10007 : SELECT CASE (globenv%run_type_id)
303 : CASE (pint_run)
304 66 : CALL do_pint_run(para_env, root_section, input_declaration, globenv)
305 : CASE (none_run, tree_mc_run)
306 : ! do nothing
307 : CASE (driver_run)
308 0 : CALL run_driver(force_env, globenv)
309 : CASE (energy_run, energy_force_run)
310 : IF (method_name_id /= do_qs .AND. &
311 : method_name_id /= do_sirius .AND. &
312 : method_name_id /= do_qmmm .AND. &
313 : method_name_id /= do_mixed .AND. &
314 : method_name_id /= do_nnp .AND. &
315 : method_name_id /= do_embed .AND. &
316 5451 : method_name_id /= do_fist .AND. &
317 : method_name_id /= do_ipi) &
318 0 : CPABORT("Energy/Force run not available for all methods ")
319 :
320 5451 : sublogger => cp_get_default_logger()
321 : CALL cp_add_iter_level(sublogger%iter_info, "JUST_ENERGY", &
322 5451 : n_rlevel_new=iter_level)
323 :
324 : ! loop over molecules to generate a molecular guess
325 : ! this procedure is initiated here to avoid passing globenv deep down
326 : ! the subroutine stack
327 5451 : IF (do_mol_loop(force_env=force_env)) &
328 10 : CALL loop_over_molecules(globenv, force_env)
329 :
330 9729 : SELECT CASE (globenv%run_type_id)
331 : CASE (energy_run)
332 4278 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE.)
333 : CASE (energy_force_run)
334 1173 : CALL force_env_calc_energy_force(force_env, calc_force=.TRUE.)
335 : CASE default
336 5451 : CPABORT("")
337 : END SELECT
338 5450 : CALL cp_rm_iter_level(sublogger%iter_info, level_name="JUST_ENERGY", n_rlevel_att=iter_level)
339 : CASE (mol_dyn_run)
340 1636 : CALL qs_mol_dyn(force_env, globenv)
341 : CASE (geo_opt_run)
342 776 : CALL cp_geo_opt(force_env, globenv)
343 : CASE (cell_opt_run)
344 224 : CALL cp_cell_opt(force_env, globenv)
345 : CASE (mon_car_run)
346 20 : CALL do_mon_car(force_env, globenv, input_declaration, input_file_name)
347 : CASE (do_tamc)
348 2 : CALL qs_tamc(force_env, globenv)
349 : CASE (real_time_propagation)
350 140 : IF (method_name_id /= do_qs) &
351 0 : CPABORT("Real time propagation needs METHOD QS. ")
352 140 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
353 140 : dft_control%rtp_control%fixed_ions = .TRUE.
354 228 : SELECT CASE (dft_control%rtp_control%rtp_method)
355 : CASE (rtp_method_bse)
356 : ! Run the TD-BSE method
357 14 : CALL run_propagation_bse(force_env%qs_env, force_env)
358 : CASE default
359 : ! Run the TDDFT method
360 140 : CALL rt_prop_setup(force_env)
361 : END SELECT
362 : CASE (ehrenfest)
363 74 : IF (method_name_id /= do_qs) &
364 0 : CPABORT("Ehrenfest dynamics needs METHOD QS ")
365 74 : CALL get_qs_env(force_env%qs_env, dft_control=dft_control)
366 74 : dft_control%rtp_control%fixed_ions = .FALSE.
367 74 : CALL qs_mol_dyn(force_env, globenv)
368 : CASE (bsse_run)
369 12 : CALL do_bsse_calculation(force_env, globenv)
370 : CASE (linear_response_run)
371 188 : IF (method_name_id /= do_qs .AND. &
372 : method_name_id /= do_qmmm) &
373 0 : CPABORT("Property calculations by Linear Response only within the QS or QMMM program ")
374 : ! The Ground State is needed, it can be read from Restart
375 188 : CALL force_env_calc_energy_force(force_env, calc_force=.FALSE., linres=.TRUE.)
376 188 : CALL linres_calculation(force_env)
377 : CASE (debug_run)
378 790 : SELECT CASE (method_name_id)
379 : CASE (do_qs, do_qmmm, do_fist)
380 736 : CALL cp2k_debug_energy_and_forces(force_env)
381 : CASE DEFAULT
382 736 : CPABORT("Debug run available only with QS, FIST, and QMMM program ")
383 : END SELECT
384 : CASE (vib_anal)
385 54 : CALL vb_anal(root_section, input_declaration, para_env, globenv)
386 : CASE (do_band)
387 34 : CALL neb(root_section, input_declaration, para_env, globenv)
388 : CASE (negf_run)
389 4 : CALL do_negf(force_env)
390 : CASE (mimic_run)
391 0 : CALL do_mimic_loop(force_env)
392 : CASE default
393 15392 : CPABORT("")
394 : END SELECT
395 :
396 : ! Sample peak memory
397 9940 : CALL m_memory()
398 :
399 9940 : CALL dbcsr_print_statistics()
400 9940 : CALL dbm_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
401 9940 : CALL grid_library_print_stats(mpi_comm=mpi_comm, output_unit=output_unit)
402 9940 : CALL offload_mempool_stats_print(mpi_comm=mpi_comm, output_unit=output_unit)
403 :
404 9940 : m_memory_max_mpi = m_memory_max
405 9940 : CALL mpi_comm%max(m_memory_max_mpi)
406 9940 : IF (output_unit > 0) THEN
407 4998 : WRITE (output_unit, *)
408 : WRITE (output_unit, '(T2,"MEMORY| Estimated peak process memory [MiB]",T73,I8)') &
409 4998 : (m_memory_max_mpi + (1024*1024) - 1)/(1024*1024)
410 : END IF
411 :
412 9940 : IF (prog_name_id == do_cp2k) THEN
413 9416 : f_env%force_env => force_env ! for mc
414 9416 : IF (ASSOCIATED(force_env%globenv)) THEN
415 9416 : IF (.NOT. ASSOCIATED(force_env%globenv, globenv)) THEN
416 0 : CALL globenv_release(force_env%globenv) !mc
417 : END IF
418 : END IF
419 9416 : force_env%globenv => globenv !mc
420 : CALL f_env_rm_defaults(f_env, ierr=ierr, &
421 9416 : handle=f_env_handle)
422 9416 : CPASSERT(ierr == 0)
423 9416 : CALL destroy_force_env(new_env_id, ierr=ierr)
424 9416 : CPASSERT(ierr == 0)
425 : ELSE
426 : I_was_ionode = para_env%is_source()
427 524 : CALL cp2k_finalize(root_section, para_env, globenv)
428 524 : CPASSERT(globenv%ref_count == 1)
429 524 : CALL section_vals_release(root_section)
430 524 : CALL globenv_release(globenv)
431 : END IF
432 :
433 9940 : CALL dbcsr_finalize_lib()
434 :
435 9940 : CALL mp_para_env_release(para_env)
436 :
437 9940 : END SUBROUTINE cp2k_run
438 :
439 : ! **************************************************************************************************
440 : !> \brief performs a farming run that performs several independent cp2k_runs
441 : !> \param input_declaration ...
442 : !> \param root_section ...
443 : !> \param para_env ...
444 : !> \param initial_variables ...
445 : !> \author Joost VandeVondele
446 : !> \note
447 : !> needs to be part of this module as the cp2k_run -> farming_run -> cp2k_run
448 : !> calling style creates a hard circular dependency
449 : ! **************************************************************************************************
450 24 : RECURSIVE SUBROUTINE farming_run(input_declaration, root_section, para_env, initial_variables)
451 : TYPE(section_type), POINTER :: input_declaration
452 : TYPE(section_vals_type), POINTER :: root_section
453 : TYPE(mp_para_env_type), POINTER :: para_env
454 : CHARACTER(len=default_path_length), DIMENSION(:, :), INTENT(IN) :: initial_variables
455 :
456 : CHARACTER(len=*), PARAMETER :: routineN = 'farming_run'
457 : INTEGER, PARAMETER :: minion_status_done = -3, &
458 : minion_status_wait = -4
459 :
460 : CHARACTER(len=7) :: label
461 : CHARACTER(LEN=default_path_length) :: output_file
462 : CHARACTER(LEN=default_string_length) :: str
463 : INTEGER :: dest, handle, i, i_job_to_restart, ierr, ijob, ijob_current, &
464 : ijob_end, ijob_start, iunit, n_jobs_to_run, new_output_unit, &
465 : new_rank, ngroups, num_minions, output_unit, primus_minion, &
466 : minion_rank, source, tag, todo
467 24 : INTEGER, DIMENSION(:), POINTER :: group_distribution, &
468 24 : captain_minion_partition, &
469 24 : minion_distribution, &
470 24 : minion_status
471 : LOGICAL :: found, captain, minion
472 : REAL(KIND=dp) :: t1, t2
473 24 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: waittime
474 : TYPE(cp_logger_type), POINTER :: logger
475 : TYPE(cp_parser_type), POINTER :: my_parser
476 : TYPE(cp_unit_set_type) :: default_units
477 : TYPE(farming_env_type), POINTER :: farming_env
478 : TYPE(section_type), POINTER :: g_section
479 : TYPE(section_vals_type), POINTER :: g_data
480 : TYPE(mp_comm_type) :: minion_group, new_group
481 :
482 : ! the primus of all minions, talks to the captain on topics concerning all minions
483 24 : CALL timeset(routineN, handle)
484 24 : NULLIFY (my_parser, g_section, g_data)
485 :
486 24 : logger => cp_get_default_logger()
487 : output_unit = cp_print_key_unit_nr(logger, root_section, "FARMING%PROGRAM_RUN_INFO", &
488 24 : extension=".log")
489 :
490 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Hi, welcome on this farm!"
491 :
492 24 : ALLOCATE (farming_env)
493 24 : CALL init_farming_env(farming_env)
494 : ! remember where we started
495 24 : CALL m_getcwd(farming_env%cwd)
496 24 : CALL farming_parse_input(farming_env, root_section, para_env)
497 :
498 : ! the full mpi group is first split in a minion group and a captain group, the latter being at most 1 process
499 24 : minion = .TRUE.
500 24 : captain = .FALSE.
501 24 : IF (farming_env%captain_minion) THEN
502 4 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Captain-Minion setup"
503 :
504 4 : ALLOCATE (captain_minion_partition(0:1))
505 12 : captain_minion_partition = [1, para_env%num_pe - 1]
506 12 : ALLOCATE (group_distribution(0:para_env%num_pe - 1))
507 :
508 : CALL minion_group%from_split(para_env, ngroups, group_distribution, &
509 4 : n_subgroups=2, group_partition=captain_minion_partition)
510 4 : DEALLOCATE (captain_minion_partition)
511 4 : DEALLOCATE (group_distribution)
512 4 : num_minions = minion_group%num_pe
513 4 : minion_rank = minion_group%mepos
514 :
515 4 : IF (para_env%mepos == 0) THEN
516 2 : minion = .FALSE.
517 2 : captain = .TRUE.
518 : ! on the captain node, num_minions corresponds to the size of the captain group
519 2 : CPASSERT(num_minions == 1)
520 2 : num_minions = para_env%num_pe - 1
521 2 : minion_rank = -1
522 : END IF
523 4 : CPASSERT(num_minions == para_env%num_pe - 1)
524 : ELSE
525 : ! all processes are minions
526 20 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A)") "FARMING| Using a Minion-only setup"
527 20 : CALL minion_group%from_dup(para_env)
528 20 : num_minions = minion_group%num_pe
529 20 : minion_rank = minion_group%mepos
530 : END IF
531 24 : IF (output_unit > 0) WRITE (output_unit, FMT="(T2,A,I0)") "FARMING| Number of Minions ", num_minions
532 :
533 : ! keep track of which para_env rank is which minion/captain
534 72 : ALLOCATE (minion_distribution(0:para_env%num_pe - 1))
535 72 : minion_distribution = 0
536 24 : minion_distribution(para_env%mepos) = minion_rank
537 120 : CALL para_env%sum(minion_distribution)
538 : ! we do have a primus inter pares
539 24 : primus_minion = 0
540 48 : DO i = 1, para_env%num_pe - 1
541 48 : IF (minion_distribution(i) == 0) primus_minion = i
542 : END DO
543 :
544 : ! split the current communicator for the minions
545 : ! in a new_group, new_size and new_rank according to the number of groups required according to the input
546 72 : ALLOCATE (group_distribution(0:num_minions - 1))
547 68 : group_distribution = -1
548 24 : IF (minion) THEN
549 22 : IF (farming_env%group_size_wish_set) THEN
550 4 : farming_env%group_size_wish = MIN(farming_env%group_size_wish, para_env%num_pe)
551 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
552 4 : subgroup_min_size=farming_env%group_size_wish, stride=farming_env%stride)
553 18 : ELSE IF (farming_env%ngroup_wish_set) THEN
554 18 : IF (ASSOCIATED(farming_env%group_partition)) THEN
555 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
556 : n_subgroups=farming_env%ngroup_wish, &
557 0 : group_partition=farming_env%group_partition, stride=farming_env%stride)
558 : ELSE
559 : CALL new_group%from_split(minion_group, ngroups, group_distribution, &
560 18 : n_subgroups=farming_env%ngroup_wish, stride=farming_env%stride)
561 : END IF
562 : ELSE
563 0 : CPABORT("must set either group_size_wish or ngroup_wish")
564 : END IF
565 22 : new_rank = new_group%mepos
566 : END IF
567 :
568 : ! transfer the info about the minion group distribution to the captain
569 24 : IF (farming_env%captain_minion) THEN
570 4 : IF (para_env%mepos == primus_minion) THEN
571 2 : tag = 1
572 4 : CALL para_env%send(group_distribution, 0, tag)
573 2 : tag = 2
574 2 : CALL para_env%send(ngroups, 0, tag)
575 : END IF
576 4 : IF (para_env%mepos == 0) THEN
577 2 : tag = 1
578 6 : CALL para_env%recv(group_distribution, primus_minion, tag)
579 2 : tag = 2
580 2 : CALL para_env%recv(ngroups, primus_minion, tag)
581 : END IF
582 : END IF
583 :
584 : ! write info on group distribution
585 24 : IF (output_unit > 0) THEN
586 12 : WRITE (output_unit, FMT="(T2,A,T71,I10)") "FARMING| Number of created MPI (Minion) groups:", ngroups
587 12 : WRITE (output_unit, FMT="(T2,A)", ADVANCE="NO") "FARMING| MPI (Minion) process to group correspondence:"
588 34 : DO i = 0, num_minions - 1
589 22 : IF (MODULO(i, 4) == 0) WRITE (output_unit, *)
590 : WRITE (output_unit, FMT='(A3,I6,A3,I6,A1)', ADVANCE="NO") &
591 34 : " (", i, " : ", group_distribution(i), ")"
592 : END DO
593 12 : WRITE (output_unit, *)
594 12 : CALL m_flush(output_unit)
595 : END IF
596 :
597 : ! protect about too many jobs being run in single go. Not more jobs are allowed than the number in the input file
598 : ! and determine the future restart point
599 24 : IF (farming_env%cycle) THEN
600 2 : n_jobs_to_run = farming_env%max_steps*ngroups
601 2 : i_job_to_restart = MODULO(farming_env%restart_n + n_jobs_to_run - 1, farming_env%njobs) + 1
602 : ELSE
603 22 : n_jobs_to_run = MIN(farming_env%njobs, farming_env%max_steps*ngroups)
604 22 : n_jobs_to_run = MIN(n_jobs_to_run, farming_env%njobs - farming_env%restart_n + 1)
605 22 : i_job_to_restart = n_jobs_to_run + farming_env%restart_n
606 : END IF
607 :
608 : ! and write the restart now, that's the point where the next job starts, even if this one is running
609 : iunit = cp_print_key_unit_nr(logger, root_section, "FARMING%RESTART", &
610 24 : extension=".restart")
611 24 : IF (iunit > 0) THEN
612 12 : WRITE (iunit, *) i_job_to_restart
613 : END IF
614 24 : CALL cp_print_key_finished_output(iunit, logger, root_section, "FARMING%RESTART")
615 :
616 : ! this is the job range to be executed.
617 24 : ijob_start = farming_env%restart_n
618 24 : ijob_end = ijob_start + n_jobs_to_run - 1
619 24 : IF (output_unit > 0 .AND. ijob_end - ijob_start < 0) THEN
620 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| --- WARNING --- NO JOBS NEED EXECUTION ? "
621 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| is the cycle keyword required ?"
622 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is a stray RESTART file present ?"
623 0 : WRITE (output_unit, FMT="(T2,A)") "FARMING| or is the group_size requested smaller than the number of CPUs?"
624 : END IF
625 :
626 : ! actual executions of the jobs in two different modes
627 24 : IF (farming_env%captain_minion) THEN
628 4 : IF (minion) THEN
629 : ! keep on doing work until captain has decided otherwise
630 2 : todo = do_wait
631 : DO
632 20 : IF (new_rank == 0) THEN
633 : ! the head minion tells the captain he's done or ready to start
634 : ! the message tells what has been done lately
635 20 : tag = 1
636 20 : dest = 0
637 20 : CALL para_env%send(todo, dest, tag)
638 :
639 : ! gets the new todo item
640 20 : tag = 2
641 20 : source = 0
642 20 : CALL para_env%recv(todo, source, tag)
643 :
644 : ! and informs his peer minions
645 20 : CALL new_group%bcast(todo, 0)
646 : ELSE
647 0 : CALL new_group%bcast(todo, 0)
648 : END IF
649 :
650 : ! if the todo is do_nothing we are flagged to quit. Otherwise it is the job number
651 0 : SELECT CASE (todo)
652 : CASE (do_wait, do_deadlock)
653 : ! go for a next round, but we first wait a bit
654 0 : t1 = m_walltime()
655 : DO
656 0 : t2 = m_walltime()
657 0 : IF (t2 - t1 > farming_env%wait_time) EXIT
658 : END DO
659 : CASE (do_nothing)
660 18 : EXIT
661 : CASE (1:)
662 20 : CALL execute_job(todo)
663 : END SELECT
664 : END DO
665 : ELSE ! captain
666 6 : ALLOCATE (minion_status(0:ngroups - 1))
667 4 : minion_status = minion_status_wait
668 2 : ijob_current = ijob_start - 1
669 :
670 20 : DO
671 24 : IF (ALL(minion_status == minion_status_done)) EXIT
672 :
673 : ! who's the next minion waiting for work
674 20 : tag = 1
675 20 : source = mp_any_source
676 20 : CALL para_env%recv(todo, source, tag) ! updates source
677 20 : IF (todo > 0) THEN
678 18 : farming_env%Job(todo)%status = job_finished
679 18 : IF (output_unit > 0) THEN
680 18 : WRITE (output_unit, FMT=*) "Job finished: ", todo
681 18 : CALL m_flush(output_unit)
682 : END IF
683 : END IF
684 :
685 : ! get the next job in line, this could be do_nothing, if we're finished
686 20 : CALL get_next_job(farming_env, ijob_start, ijob_end, ijob_current, todo)
687 20 : dest = source
688 20 : tag = 2
689 20 : CALL para_env%send(todo, dest, tag)
690 :
691 22 : IF (todo > 0) THEN
692 18 : farming_env%Job(todo)%status = job_running
693 18 : IF (output_unit > 0) THEN
694 18 : WRITE (output_unit, FMT=*) "Job: ", todo, " Dir: ", TRIM(farming_env%Job(todo)%cwd), &
695 36 : " assigned to group ", group_distribution(minion_distribution(dest))
696 18 : CALL m_flush(output_unit)
697 : END IF
698 : ELSE
699 2 : IF (todo == do_nothing) THEN
700 2 : minion_status(group_distribution(minion_distribution(dest))) = minion_status_done
701 2 : IF (output_unit > 0) THEN
702 2 : WRITE (output_unit, FMT=*) "group done: ", group_distribution(minion_distribution(dest))
703 2 : CALL m_flush(output_unit)
704 : END IF
705 : END IF
706 2 : IF (todo == do_deadlock) THEN
707 0 : IF (output_unit > 0) THEN
708 0 : WRITE (output_unit, FMT=*) ""
709 0 : WRITE (output_unit, FMT=*) "FARMING JOB DEADLOCKED ... CIRCULAR DEPENDENCIES"
710 0 : WRITE (output_unit, FMT=*) ""
711 0 : CALL m_flush(output_unit)
712 : END IF
713 0 : CPASSERT(todo /= do_deadlock)
714 : END IF
715 : END IF
716 :
717 : END DO
718 :
719 2 : DEALLOCATE (minion_status)
720 :
721 : END IF
722 : ELSE
723 : ! this is the non-captain-minion mode way of executing the jobs
724 : ! the i-th job in the input is always executed by the MODULO(i-1,ngroups)-th group
725 : ! (needed for cyclic runs, we don't want two groups working on the same job)
726 20 : IF (output_unit > 0) THEN
727 10 : IF (ijob_end - ijob_start >= 0) THEN
728 10 : WRITE (output_unit, FMT="(T2,A)") "FARMING| List of jobs : "
729 81 : DO ijob = ijob_start, ijob_end
730 71 : i = MODULO(ijob - 1, farming_env%njobs) + 1
731 71 : WRITE (output_unit, FMT=*) "Job: ", i, " Dir: ", TRIM(farming_env%Job(i)%cwd), " Input: ", &
732 152 : TRIM(farming_env%Job(i)%input), " MPI group:", MODULO(i - 1, ngroups)
733 : END DO
734 : END IF
735 10 : CALL m_flush(output_unit)
736 : END IF
737 :
738 162 : DO ijob = ijob_start, ijob_end
739 142 : i = MODULO(ijob - 1, farming_env%njobs) + 1
740 : ! this farms out the jobs
741 162 : IF (MODULO(i - 1, ngroups) == group_distribution(minion_rank)) THEN
742 104 : IF (output_unit > 0) THEN
743 54 : WRITE (output_unit, FMT="(T2,A,I5.5,A)", ADVANCE="NO") " Running Job ", i, &
744 108 : " in "//TRIM(farming_env%Job(i)%cwd)//"."
745 54 : CALL m_flush(output_unit)
746 : END IF
747 104 : CALL execute_job(i)
748 104 : IF (output_unit > 0) THEN
749 54 : WRITE (output_unit, FMT="(A)") " Done, output in "//TRIM(output_file)
750 54 : CALL m_flush(output_unit)
751 : END IF
752 : END IF
753 : END DO
754 : END IF
755 :
756 : ! keep information about how long each process has to wait
757 : ! i.e. the load imbalance
758 24 : t1 = m_walltime()
759 24 : CALL para_env%sync()
760 24 : t2 = m_walltime()
761 72 : ALLOCATE (waittime(0:para_env%num_pe - 1))
762 72 : waittime = 0.0_dp
763 24 : waittime(para_env%mepos) = t2 - t1
764 24 : CALL para_env%sum(waittime)
765 24 : IF (output_unit > 0) THEN
766 12 : WRITE (output_unit, '(T2,A)') "Process idle times [s] at the end of the run"
767 36 : DO i = 0, para_env%num_pe - 1
768 : WRITE (output_unit, FMT='(A2,I6,A3,F8.3,A1)', ADVANCE="NO") &
769 24 : " (", i, " : ", waittime(i), ")"
770 36 : IF (MOD(i + 1, 4) == 0) WRITE (output_unit, '(A)') ""
771 : END DO
772 12 : CALL m_flush(output_unit)
773 : END IF
774 24 : DEALLOCATE (waittime)
775 :
776 : ! give back the communicators of the split groups
777 24 : IF (minion) CALL new_group%free()
778 24 : CALL minion_group%free()
779 :
780 : ! and message passing deallocate structures
781 24 : DEALLOCATE (group_distribution)
782 24 : DEALLOCATE (minion_distribution)
783 :
784 : ! clean the farming env
785 24 : CALL deallocate_farming_env(farming_env)
786 :
787 : CALL cp_print_key_finished_output(output_unit, logger, root_section, &
788 24 : "FARMING%PROGRAM_RUN_INFO")
789 :
790 288 : CALL timestop(handle)
791 :
792 : CONTAINS
793 : ! **************************************************************************************************
794 : !> \brief ...
795 : !> \param i ...
796 : ! **************************************************************************************************
797 122 : RECURSIVE SUBROUTINE execute_job(i)
798 : INTEGER :: i
799 :
800 : ! change to the new working directory
801 :
802 122 : CALL m_chdir(TRIM(farming_env%Job(i)%cwd), ierr)
803 122 : IF (ierr /= 0) &
804 0 : CPABORT("Failed to change dir to: "//TRIM(farming_env%Job(i)%cwd))
805 :
806 : ! generate a fresh call to cp2k_run
807 122 : IF (new_rank == 0) THEN
808 :
809 89 : IF (farming_env%Job(i)%output == "") THEN
810 : ! generate the output file
811 85 : WRITE (output_file, '(A12,I5.5)') "FARMING_OUT_", i
812 255 : ALLOCATE (my_parser)
813 85 : CALL parser_create(my_parser, file_name=TRIM(farming_env%Job(i)%input))
814 85 : label = "&GLOBAL"
815 85 : CALL parser_search_string(my_parser, label, ignore_case=.TRUE., found=found)
816 170 : IF (found) THEN
817 85 : CALL create_global_section(g_section)
818 85 : CALL section_vals_create(g_data, g_section)
819 : CALL cp_unit_set_create(default_units, "OUTPUT")
820 85 : CALL section_vals_parse(g_data, my_parser, default_units)
821 85 : CALL cp_unit_set_release(default_units)
822 : CALL section_vals_val_get(g_data, "PROJECT", &
823 85 : c_val=str)
824 85 : IF (str /= "") output_file = TRIM(str)//".out"
825 : CALL section_vals_val_get(g_data, "OUTPUT_FILE_NAME", &
826 85 : c_val=str)
827 85 : IF (str /= "") output_file = str
828 85 : CALL section_vals_release(g_data)
829 85 : CALL section_release(g_section)
830 : END IF
831 85 : CALL parser_release(my_parser)
832 85 : DEALLOCATE (my_parser)
833 : ELSE
834 4 : output_file = farming_env%Job(i)%output
835 : END IF
836 :
837 : CALL open_file(file_name=TRIM(output_file), &
838 : file_action="WRITE", &
839 : file_status="UNKNOWN", &
840 : file_position="APPEND", &
841 89 : unit_number=new_output_unit)
842 : ELSE
843 : ! this unit should be negative, otherwise all processors that get a default unit
844 : ! start writing output (to the same file, adding to confusion).
845 : ! error handling should be careful, asking for a local output unit if required
846 33 : new_output_unit = -1
847 : END IF
848 :
849 122 : CALL cp2k_run(input_declaration, TRIM(farming_env%Job(i)%input), new_output_unit, new_group, initial_variables)
850 :
851 122 : IF (new_rank == 0) CALL close_file(unit_number=new_output_unit)
852 :
853 : ! change to the original working directory
854 122 : CALL m_chdir(TRIM(farming_env%cwd), ierr)
855 122 : CPASSERT(ierr == 0)
856 :
857 122 : END SUBROUTINE execute_job
858 : END SUBROUTINE farming_run
859 :
860 : ! **************************************************************************************************
861 : !> \brief ...
862 : ! **************************************************************************************************
863 0 : SUBROUTINE write_xml_file()
864 :
865 : INTEGER :: i, unit_number
866 : TYPE(section_type), POINTER :: root_section
867 :
868 0 : NULLIFY (root_section)
869 0 : CALL create_cp2k_root_section(root_section)
870 0 : CALL keyword_release(root_section%keywords(0)%keyword)
871 : CALL open_file(unit_number=unit_number, &
872 : file_name="cp2k_input.xml", &
873 : file_action="WRITE", &
874 0 : file_status="REPLACE")
875 :
876 0 : WRITE (UNIT=unit_number, FMT="(A)") '<?xml version="1.0" encoding="utf-8"?>'
877 :
878 : !MK CP2K input structure
879 : WRITE (UNIT=unit_number, FMT="(A)") &
880 0 : "<CP2K_INPUT>", &
881 0 : " <CP2K_VERSION>"//TRIM(cp2k_version)//"</CP2K_VERSION>", &
882 0 : " <CP2K_YEAR>"//TRIM(cp2k_year)//"</CP2K_YEAR>", &
883 0 : " <COMPILE_DATE>"//TRIM(compile_date)//"</COMPILE_DATE>", &
884 0 : " <COMPILE_REVISION>"//TRIM(compile_revision)//"</COMPILE_REVISION>"
885 :
886 0 : CALL export_references_as_xml(unit_number)
887 0 : CALL export_units_as_xml(unit_number)
888 :
889 0 : DO i = 1, root_section%n_subsections
890 0 : CALL write_section_xml(root_section%subsections(i)%section, 1, unit_number)
891 : END DO
892 :
893 0 : WRITE (UNIT=unit_number, FMT="(A)") "</CP2K_INPUT>"
894 0 : CALL close_file(unit_number=unit_number)
895 0 : CALL section_release(root_section)
896 :
897 0 : END SUBROUTINE write_xml_file
898 :
899 : ! **************************************************************************************************
900 : !> \brief runs the given input
901 : !> \param input_declaration ...
902 : !> \param input_file_path the path of the input file
903 : !> \param output_file_path path of the output file (to which it is appended)
904 : !> if it is "__STD_OUT__" the default_output_unit is used
905 : !> \param initial_variables key-value list of initial preprocessor variables
906 : !> \param mpi_comm the mpi communicator to be used for this environment
907 : !> it will not be freed
908 : !> \author fawzi
909 : !> \note
910 : !> moved here because of circular dependencies
911 : ! **************************************************************************************************
912 9819 : SUBROUTINE run_input(input_declaration, input_file_path, output_file_path, initial_variables, mpi_comm)
913 : TYPE(section_type), POINTER :: input_declaration
914 : CHARACTER(len=*), INTENT(in) :: input_file_path, output_file_path
915 : CHARACTER(len=default_path_length), &
916 : DIMENSION(:, :), INTENT(IN) :: initial_variables
917 : TYPE(mp_comm_type), INTENT(in), OPTIONAL :: mpi_comm
918 :
919 : INTEGER :: unit_nr
920 : TYPE(mp_para_env_type), POINTER :: para_env
921 :
922 9819 : IF (PRESENT(mpi_comm)) THEN
923 0 : ALLOCATE (para_env)
924 0 : para_env = mpi_comm
925 : ELSE
926 9819 : para_env => f77_default_para_env
927 9819 : CALL para_env%retain()
928 : END IF
929 9819 : IF (para_env%is_source()) THEN
930 4910 : IF (output_file_path == "__STD_OUT__") THEN
931 4910 : unit_nr = default_output_unit
932 : ELSE
933 0 : INQUIRE (FILE=output_file_path, NUMBER=unit_nr)
934 : END IF
935 : ELSE
936 4909 : unit_nr = -1
937 : END IF
938 9819 : CALL cp2k_run(input_declaration, input_file_path, unit_nr, para_env, initial_variables)
939 9818 : CALL mp_para_env_release(para_env)
940 9818 : END SUBROUTINE run_input
941 :
942 : END MODULE cp2k_runs
|