Skip to content

utils

Helper functions for operations on Zarr attributes and OME-NGFF metadata.

_find_omengff_acquisition(image_zarr_path)

Discover the acquisition index based on OME-NGFF metadata.

Given the path to a zarr image folder (e.g. /path/plate.zarr/B/03/0), extract the acquisition index from the .zattrs file of the parent folder (i.e. at the well level), or return None if acquisition is not specified.

Notes:

  1. For non-multiplexing datasets, acquisition is not a required information in the metadata. If it is not there, this function returns None.
  2. This function fails if we use an image that does not belong to an OME-NGFF well.
PARAMETER DESCRIPTION
image_zarr_path

Full path to an OME-NGFF image folder.

TYPE: Path

Source code in fractal_tasks_core/utils.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def _find_omengff_acquisition(image_zarr_path: Path) -> Union[int, None]:
    """
    Discover the acquisition index based on OME-NGFF metadata.

    Given the path to a zarr image folder (e.g. `/path/plate.zarr/B/03/0`),
    extract the acquisition index from the `.zattrs` file of the parent
    folder (i.e. at the well level), or return `None` if acquisition is not
    specified.

    Notes:

    1. For non-multiplexing datasets, acquisition is not a required
       information in the metadata. If it is not there, this function
       returns `None`.
    2. This function fails if we use an image that does not belong to
       an OME-NGFF well.

    Args:
        image_zarr_path: Full path to an OME-NGFF image folder.
    """

    # Identify well path and attrs
    well_zarr_path = image_zarr_path.parent
    if not (well_zarr_path / ".zattrs").exists():
        raise ValueError(
            f"{str(well_zarr_path)} must be an OME-NGFF well "
            "folder, but it does not include a .zattrs file."
        )
    well_group = zarr.open_group(str(well_zarr_path))
    attrs_images = well_group.attrs["well"]["images"]

    # Loook for the acquisition of the current image (if any)
    acquisition = None
    for img_dict in attrs_images:
        if (
            img_dict["path"] == image_zarr_path.name
            and "acquisition" in img_dict.keys()
        ):
            acquisition = img_dict["acquisition"]
            break

    return acquisition

_get_table_path_dict(zarr_url)

Compile dictionary of (table name, table path) key/value pairs.

PARAMETER DESCRIPTION
zarr_url

Path or url to the individual OME-Zarr image to be processed.

TYPE: str

RETURNS DESCRIPTION
dict[str, str]

Dictionary with table names as keys and table paths as values. If tables Zarr group is missing, or if it does not have a tables key, then return an empty dictionary.

Source code in fractal_tasks_core/utils.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def _get_table_path_dict(zarr_url: str) -> dict[str, str]:
    """
    Compile dictionary of (table name, table path) key/value pairs.


    Args:
        zarr_url:
            Path or url to the individual OME-Zarr image to be processed.

    Returns:
        Dictionary with table names as keys and table paths as values. If
            `tables` Zarr group is missing, or if it does not have a `tables`
            key, then return an empty dictionary.
    """

    try:
        tables_group = zarr.open_group(f"{zarr_url}/tables", "r")
        table_list = tables_group.attrs["tables"]
    except (zarr.errors.GroupNotFoundError, KeyError):
        table_list = []

    table_path_dict = {}
    for table in table_list:
        table_path_dict[table] = f"{zarr_url}/tables/{table}"

    return table_path_dict

_split_well_path_image_path(zarr_url)

Returns path to well folder for HCS OME-Zarr zarr_url.

Source code in fractal_tasks_core/utils.py
241
242
243
244
245
246
247
248
def _split_well_path_image_path(zarr_url: str) -> tuple[str, str]:
    """
    Returns path to well folder for HCS OME-Zarr `zarr_url`.
    """
    zarr_url = zarr_url.rstrip("/")
    well_path = "/".join(zarr_url.split("/")[:-1])
    img_path = zarr_url.split("/")[-1]
    return well_path, img_path

create_well_acquisition_dict(zarr_urls)

Parses zarr_urls & groups them by HCS wells & acquisition

Generates a dict with keys a unique description of the acquisition (e.g. plate + well for HCS plates). The values are dictionaries. The keys of the secondary dictionary are the acqusitions, its values the zarr_url for a given acquisition.

PARAMETER DESCRIPTION
zarr_urls

List of zarr_urls

TYPE: list[str]

RETURNS DESCRIPTION
dict[str, dict[int, str]]

image_groups

Source code in fractal_tasks_core/utils.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def create_well_acquisition_dict(
    zarr_urls: list[str],
) -> dict[str, dict[int, str]]:
    """
    Parses zarr_urls & groups them by HCS wells & acquisition

    Generates a dict with keys a unique description of the acquisition
    (e.g. plate + well for HCS plates). The values are dictionaries. The keys
    of the secondary dictionary are the acqusitions, its values the `zarr_url`
    for a given acquisition.

    Args:
        zarr_urls: List of zarr_urls

    Returns:
        image_groups
    """
    image_groups = dict()

    # Dict to cache well-level metadata
    well_metadata = dict()
    for zarr_url in zarr_urls:
        well_path, img_sub_path = _split_well_path_image_path(zarr_url)
        # For the first zarr_url of a well, load the well metadata and
        # initialize the image_groups dict
        if well_path not in image_groups:
            well_meta = load_NgffWellMeta(well_path)
            well_metadata[well_path] = well_meta.well
            image_groups[well_path] = {}

        # For every zarr_url, add it under the well_path & acquisition keys to
        # the image_groups dict
        for image in well_metadata[well_path].images:
            if image.path == img_sub_path:
                if image.acquisition in image_groups[well_path]:
                    raise ValueError(
                        "This task has not been built for OME-Zarr HCS plates"
                        "with multiple images of the same acquisition per well"
                        f". {image.acquisition} is the acquisition for "
                        f"multiple images in {well_path=}."
                    )

                image_groups[well_path][image.acquisition] = zarr_url
    return image_groups

get_parameters_from_metadata(*, keys, metadata, image_zarr_path)

Flexibly extract parameters from metadata dictionary

This covers both parameters which are acquisition-specific (if the image belongs to an OME-NGFF array and its acquisition is specified) or simply available in the dictionary. The two cases are handled as:

metadata[acquisition]["some_parameter"]  # acquisition available
metadata["some_parameter"]               # acquisition not available

PARAMETER DESCRIPTION
keys

list of required parameters.

TYPE: Sequence[str]

metadata

metadata dictionary.

TYPE: dict[str, Any]

image_zarr_path

full path to image, e.g. /path/plate.zarr/B/03/0.

TYPE: Path

Source code in fractal_tasks_core/utils.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def get_parameters_from_metadata(
    *,
    keys: Sequence[str],
    metadata: dict[str, Any],
    image_zarr_path: Path,
) -> dict[str, Any]:
    """
    Flexibly extract parameters from metadata dictionary

    This covers both parameters which are acquisition-specific (if the image
    belongs to an OME-NGFF array and its acquisition is specified) or simply
    available in the dictionary.
    The two cases are handled as:
    ```
    metadata[acquisition]["some_parameter"]  # acquisition available
    metadata["some_parameter"]               # acquisition not available
    ```

    Args:
        keys: list of required parameters.
        metadata: metadata dictionary.
        image_zarr_path: full path to image, e.g. `/path/plate.zarr/B/03/0`.
    """

    parameters = {}
    acquisition = _find_omengff_acquisition(image_zarr_path)
    if acquisition is not None:
        parameters["acquisition"] = acquisition

    for key in keys:
        if acquisition is None:
            parameter = metadata[key]
        else:
            try:
                parameter = metadata[key][str(acquisition)]
            except TypeError:
                parameter = metadata[key]
            except KeyError:
                parameter = metadata[key]
        parameters[key] = parameter
    return parameters

rescale_datasets(*, datasets, coarsening_xy, reference_level, remove_channel_axis=False)

Given a set of datasets (as per OME-NGFF specs), update their "scale" transformations in the YX directions by including a prefactor (coarsening_xy**reference_level).

PARAMETER DESCRIPTION
datasets

list of datasets (as per OME-NGFF specs).

TYPE: list[dict]

coarsening_xy

linear coarsening factor between subsequent levels.

TYPE: int

reference_level

TBD

TYPE: int

remove_channel_axis

If True, remove the first item of all scale transformations.

TYPE: bool DEFAULT: False

Source code in fractal_tasks_core/utils.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def rescale_datasets(
    *,
    datasets: list[dict],
    coarsening_xy: int,
    reference_level: int,
    remove_channel_axis: bool = False,
) -> list[dict]:
    """
    Given a set of datasets (as per OME-NGFF specs), update their "scale"
    transformations in the YX directions by including a prefactor
    (coarsening_xy**reference_level).

    Args:
        datasets: list of datasets (as per OME-NGFF specs).
        coarsening_xy: linear coarsening factor between subsequent levels.
        reference_level: TBD
        remove_channel_axis: If `True`, remove the first item of all `scale`
            transformations.
    """

    # Construct rescaled datasets
    new_datasets = []
    for ds in datasets:
        new_ds = {}

        # Copy all keys that are not coordinateTransformations (e.g. path)
        for key in ds.keys():
            if key != "coordinateTransformations":
                new_ds[key] = ds[key]

        # Update coordinateTransformations
        old_transformations = ds["coordinateTransformations"]
        new_transformations = []
        for t in old_transformations:
            if t["type"] == "scale":
                new_t: dict[str, Any] = t.copy()
                # Rescale last two dimensions (that is, Y and X)
                prefactor = coarsening_xy**reference_level
                new_t["scale"][-2] = new_t["scale"][-2] * prefactor
                new_t["scale"][-1] = new_t["scale"][-1] * prefactor
                if remove_channel_axis:
                    new_t["scale"].pop(0)
                new_transformations.append(new_t)
            else:
                new_transformations.append(t)
        new_ds["coordinateTransformations"] = new_transformations
        new_datasets.append(new_ds)

    return new_datasets