Skip to content

zip_tools

_create_zip(folder, output)

Zip a folder into a zip-file or into a BytesIO.

PARAMETER DESCRIPTION
folder

Folder to be zipped.

TYPE: str

output

Either a string with the path of the zip file, or a BytesIO object.

TYPE: T

RETURNS DESCRIPTION
T

Either the zip-file path string, or the modified BytesIO object.

Source code in fractal_server/zip_tools.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def _create_zip(folder: str, output: T) -> T:
    """
    Zip a folder into a zip-file or into a BytesIO.

    Args:
        folder: Folder to be zipped.
        output: Either a string with the path of the zip file, or a BytesIO
            object.

    Returns:
        Either the zip-file path string, or the modified BytesIO object.
    """
    if isinstance(output, str) and os.path.exists(output):
        raise FileExistsError(f"Zip file '{output}' already exists")
    if isinstance(output, BytesIO) and output.getbuffer().nbytes > 0:
        raise ValueError("BytesIO is not empty")

    with ZipFile(output, mode="w", compression=ZIP_DEFLATED) as zipfile:
        for root, dirs, files in os.walk(folder):
            for file in files:
                file_path = os.path.join(root, file)
                archive_path = os.path.relpath(file_path, folder)
                zipfile.write(file_path, archive_path)
    return output

_folder_can_be_deleted(folder)

Given the path of a folder as string, returns False if either: - the related zip file {folder}.zip does already exists, - the folder and the zip file have a different number of internal files, - the zip file has a very small size. Otherwise returns True.

Source code in fractal_server/zip_tools.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def _folder_can_be_deleted(folder: str) -> bool:
    """
    Given the path of a folder as string, returns `False` if either:
    - the related zip file `{folder}.zip` does already exists,
    - the folder and the zip file have a different number of internal files,
    - the zip file has a very small size.
    Otherwise returns `True`.
    """
    # CHECK 1: zip file exists
    zip_file = f"{folder}.zip"
    if not os.path.exists(zip_file):
        logger.info(
            f"Folder '{folder}' won't be deleted because file '{zip_file}' "
            "does not exist."
        )
        return False

    # CHECK 2: folder and zip file have the same number of files
    folder_files_count = sum(1 for f in Path(folder).rglob("*") if f.is_file())
    with ZipFile(zip_file, "r") as zip_ref:
        zip_files_count = len(zip_ref.namelist())
    if folder_files_count != zip_files_count:
        logger.info(
            f"Folder '{folder}' won't be deleted because it contains "
            f"{folder_files_count} files while '{zip_file}' contains "
            f"{zip_files_count}."
        )
        return False

    # CHECK 3: zip file size is >= than `THRESHOLD_ZIP_FILE_SIZE_MB`
    zip_size = os.path.getsize(zip_file)
    if zip_size < THRESHOLD_ZIP_FILE_SIZE_MB * (1024**2):
        logger.info(
            f"Folder '{folder}' won't be deleted because '{zip_file}' is too "
            f"small ({zip_size / (1024**2):.5f} MB, whereas the minimum limit "
            f"for deletion is {THRESHOLD_ZIP_FILE_SIZE_MB})."
        )
        return False

    return True

_read_single_file_from_zip(*, file_path, archive_path)

Reads and returns the contents of a single file from a ZIP archive using unzip -p.

PARAMETER DESCRIPTION
file_path

relative to the archive

TYPE: str

archive_path

TYPE: str

RETURNS DESCRIPTION
str

The file content

RAISES DESCRIPTION
FileNotFoundError

if the file is not inside the archive

Source code in fractal_server/zip_tools.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def _read_single_file_from_zip(*, file_path: str, archive_path: str) -> str:
    """
    Reads and returns the contents of a single file from a ZIP archive using
    `unzip -p`.

    Args:
        file_path:
            relative to the archive
        archive_path:

    Returns:
        The file content

    Raises:
        FileNotFoundError:
            if the file is not inside the archive
    """
    result = subprocess.run(  # nosec
        ["unzip", "-p", archive_path, file_path],
        capture_output=True,
        encoding="utf-8",
        check=False,
    )

    if result.returncode != 0:
        # The caller function should handle this error
        raise FileNotFoundError(
            f"File '{file_path}' not found inside archive '{archive_path}'."
        )

    return result.stdout

_zip_folder_to_byte_stream_iterator(folder)

Returns byte stream with the zipped log folder of a job.

PARAMETER DESCRIPTION
folder

the folder to zip

TYPE: str

Source code in fractal_server/zip_tools.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def _zip_folder_to_byte_stream_iterator(folder: str) -> Iterator:
    """
    Returns byte stream with the zipped log folder of a job.

    Args:
        folder: the folder to zip
    """
    zip_file = Path(f"{folder}.zip")

    if os.path.exists(zip_file):

        def iterfile():
            """
            https://fastapi.tiangolo.com/advanced/custom-response/#using-streamingresponse-with-file-like-objects
            """
            with open(zip_file, mode="rb") as file_like:
                yield from file_like

        return iterfile()

    else:
        byte_stream = _create_zip(folder, output=BytesIO())
        return iter([byte_stream.getvalue()])

_zip_folder_to_file_and_remove(folder)

Creates a ZIP archive of the specified folder and removes the original folder (if it can be deleted).

This function performs the following steps: 1. Creates a ZIP archive of the folder and names it with a temporary suffix _tmp.zip. 2. Renames the ZIP removing the suffix (this would possibly overwrite a file with the same name already present). 3. Checks if the folder can be safely deleted using the _folder_can_be_deleted function. If so, deletes the original folder.

Source code in fractal_server/zip_tools.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def _zip_folder_to_file_and_remove(folder: str) -> None:
    """
    Creates a ZIP archive of the specified folder and removes the original
    folder (if it can be deleted).

    This function performs the following steps:
    1. Creates a ZIP archive of the `folder` and names it with a temporary
       suffix `_tmp.zip`.
    2. Renames the ZIP removing the suffix (this would possibly overwrite a
        file with the same name already present).
    3. Checks if the folder can be safely deleted using the
        `_folder_can_be_deleted` function. If so, deletes the original folder.
    """

    tmp_zipfile = f"{folder}_tmp.zip"
    zipfile = f"{folder}.zip"

    try:
        logger.info(f"Start creating temporary zip file at '{tmp_zipfile}'.")
        _create_zip(folder, tmp_zipfile)
        logger.info("Zip file created.")
    except Exception as e:
        logger.error(
            f"Error while creating temporary zip file. Original error: '{e}'."
        )
        Path(tmp_zipfile).unlink(missing_ok=True)
        return

    logger.info(f"Moving temporary zip file to {zipfile}.")
    shutil.move(tmp_zipfile, zipfile)
    logger.info("Zip file moved.")

    if _folder_can_be_deleted(folder):
        logger.info(f"Removing folder '{folder}'.")
        shutil.rmtree(folder)
        logger.info("Folder removed.")