Skip to content

_check_jobs_status

_jobs_finished(job_ids)

Check which ones of the given Slurm jobs already finished

The function is based on the _jobs_finished function from clusterfutures (version 0.5). Original Copyright: 2022 Adrian Sampson (released under the MIT licence)

Source code in fractal_server/app/runner/executors/slurm/sudo/_check_jobs_status.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def _jobs_finished(job_ids) -> set[str]:
    """
    Check which ones of the given Slurm jobs already finished

    The function is based on the `_jobs_finished` function from
    clusterfutures (version 0.5).
    Original Copyright: 2022 Adrian Sampson
    (released under the MIT licence)
    """

    # If there is no Slurm job to check, return right away
    if not job_ids:
        return set()
    id_to_state = dict()

    res = run_squeue(job_ids)
    if res.returncode == 0:
        id_to_state = {
            out.split()[0]: out.split()[1] for out in res.stdout.splitlines()
        }
    else:
        id_to_state = dict()
        for j in job_ids:
            res = run_squeue([j])
            if res.returncode != 0:
                logger.info(f"Job {j} not found. Marked it as completed")
                id_to_state.update({str(j): "COMPLETED"})
            else:
                id_to_state.update(
                    {res.stdout.split()[0]: res.stdout.split()[1]}
                )

    # Finished jobs only stay in squeue for a few mins (configurable). If
    # a job ID isn't there, we'll assume it's finished.
    return {
        j
        for j in job_ids
        if id_to_state.get(j, "COMPLETED") in STATES_FINISHED
    }