Check which ones of the given Slurm jobs already finished
The function is based on the _jobs_finished
function from
clusterfutures (version 0.5).
Original Copyright: 2022 Adrian Sampson
(released under the MIT licence)
Source code in fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 | def _jobs_finished(job_ids) -> set[str]:
"""
Check which ones of the given Slurm jobs already finished
The function is based on the `_jobs_finished` function from
clusterfutures (version 0.5).
Original Copyright: 2022 Adrian Sampson
(released under the MIT licence)
"""
# If there is no Slurm job to check, return right away
if not job_ids:
return set()
id_to_state = dict()
res = run_squeue(job_ids)
if res.returncode == 0:
id_to_state = {
out.split()[0]: out.split()[1] for out in res.stdout.splitlines()
}
else:
id_to_state = dict()
for j in job_ids:
res = run_squeue([j])
if res.returncode != 0:
logger.info(f"Job {j} not found. Marked it as completed")
id_to_state.update({str(j): "COMPLETED"})
else:
id_to_state.update(
{res.stdout.split()[0]: res.stdout.split()[1]}
)
# Finished jobs only stay in squeue for a few mins (configurable). If
# a job ID isn't there, we'll assume it's finished.
return {
j
for j in job_ids
if id_to_state.get(j, "COMPLETED") in STATES_FINISHED
}
|