Skip to content

copy_ome_zarr_hcs_plate

Task that copies the structure of an OME-NGFF zarr array to a new one.

_generate_plate_well_metadata(zarr_urls)

Generate metadata for OME-Zarr HCS plates & wells.

Based on the list of zarr_urls, generate metadata for all plates and all their wells.

PARAMETER DESCRIPTION
zarr_urls

List of paths or urls to the individual OME-Zarr image to be processed.

TYPE: list[str]

RETURNS DESCRIPTION
plate_metadata_dicts

Dictionary of plate plate metadata. The structure is: {"old_plate_name": NgffPlateMeta (as dict)}.

TYPE: dict[str, dict]

new_well_image_attrs

Dictionary of image lists for the new wells. The structure is: {"old_plate_name": {"old_well_name": [ImageInWell(as dict)]}}

TYPE: dict[str, dict[str, dict]]

well_image_attrs

Dictionary of Image attributes of the existing wells.

TYPE: dict[str, dict]

Source code in fractal_tasks_core/tasks/copy_ome_zarr_hcs_plate.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def _generate_plate_well_metadata(
    zarr_urls: list[str],
) -> tuple[dict[str, dict], dict[str, dict[str, dict]], dict[str, dict]]:
    """
    Generate metadata for OME-Zarr HCS plates & wells.

    Based on the list of zarr_urls, generate metadata for all plates and all
    their wells.

    Args:
        zarr_urls: List of paths or urls to the individual OME-Zarr image to
            be processed.

    Returns:
        plate_metadata_dicts: Dictionary of plate plate metadata. The structure
            is: {"old_plate_name": NgffPlateMeta (as dict)}.
        new_well_image_attrs: Dictionary of image lists for the new wells.
            The structure is: {"old_plate_name": {"old_well_name":
            [ImageInWell(as dict)]}}
        well_image_attrs: Dictionary of Image attributes of the existing wells.
    """
    # TODO: Simplify this block. Currently complicated, because we need to loop
    # through all potential plates, all their wells & their images to build up
    # the metadata for the plate & well.
    plate_metadata_dicts = {}
    plate_wells = {}
    well_image_attrs = {}
    new_well_image_attrs = {}
    for zarr_url in zarr_urls:
        # Extract plate/well/image parts of `zarr_url`
        old_plate_url = _get_plate_url_from_image_url(zarr_url)
        well_sub_url = _get_well_sub_url(zarr_url)
        curr_img_sub_url = _get_image_sub_url(zarr_url)

        # The first time a plate is found, create its metadata
        if old_plate_url not in plate_metadata_dicts:
            logger.info(f"Reading plate metadata of {old_plate_url=}")
            old_plate_meta = load_NgffPlateMeta(old_plate_url)
            plate_metadata = dict(
                plate=dict(
                    acquisitions=old_plate_meta.plate.acquisitions,
                    field_count=old_plate_meta.plate.field_count,
                    name=old_plate_meta.plate.name,
                    # The new field count could be different from the old
                    # field count
                    version=old_plate_meta.plate.version,
                )
            )
            plate_metadata_dicts[old_plate_url] = plate_metadata
            plate_wells[old_plate_url] = []
            well_image_attrs[old_plate_url] = {}
            new_well_image_attrs[old_plate_url] = {}

        # The first time a plate/well pair is found, create the well metadata
        if well_sub_url not in plate_wells[old_plate_url]:
            plate_wells[old_plate_url].append(well_sub_url)
            old_well_url = f"{old_plate_url}/{well_sub_url}"
            logger.info(f"Reading well metadata of {old_well_url}")
            well_attrs = load_NgffWellMeta(old_well_url)
            well_image_attrs[old_plate_url][well_sub_url] = well_attrs.well
            new_well_image_attrs[old_plate_url][well_sub_url] = []

        # Find images of the current well with name matching the current image
        # TODO: clarify whether this list must always have length 1
        curr_well_image_list = [
            img
            for img in well_image_attrs[old_plate_url][well_sub_url].images
            if img.path == curr_img_sub_url
        ]
        new_well_image_attrs[old_plate_url][
            well_sub_url
        ] += curr_well_image_list

    # Fill in the plate metadata based on all available wells
    for old_plate_url in plate_metadata_dicts:
        well_list, row_list, column_list = _generate_wells_rows_columns(
            plate_wells[old_plate_url]
        )
        plate_metadata_dicts[old_plate_url]["plate"]["columns"] = []
        for column in column_list:
            plate_metadata_dicts[old_plate_url]["plate"]["columns"].append(
                {"name": column}
            )

        plate_metadata_dicts[old_plate_url]["plate"]["rows"] = []
        for row in row_list:
            plate_metadata_dicts[old_plate_url]["plate"]["rows"].append(
                {"name": row}
            )
        plate_metadata_dicts[old_plate_url]["plate"]["wells"] = well_list

        # Validate with NgffPlateMeta model
        plate_metadata_dicts[old_plate_url] = NgffPlateMeta(
            **plate_metadata_dicts[old_plate_url]
        ).model_dump(exclude_none=True)

    return plate_metadata_dicts, new_well_image_attrs, well_image_attrs

_generate_wells_rows_columns(well_list)

Generate the plate well metadata based on the list of wells.

Source code in fractal_tasks_core/tasks/copy_ome_zarr_hcs_plate.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def _generate_wells_rows_columns(
    well_list: list[str],
) -> tuple[list[WellInPlate], list[str], list[str]]:
    """
    Generate the plate well metadata based on the list of wells.
    """
    rows = []
    columns = []
    wells = []
    for well in well_list:
        rows.append(well.split("/")[0])
        columns.append(well.split("/")[1])
    rows = sorted(list(set(rows)))
    columns = sorted(list(set(columns)))
    for well in well_list:
        wells.append(
            WellInPlate(
                path=well,
                rowIndex=rows.index(well.split("/")[0]),
                columnIndex=columns.index(well.split("/")[1]),
            )
        )

    return wells, rows, columns

_get_image_sub_url(zarr_url)

Given the absolute zarr_url for an OME-Zarr image, return the image zarr-group name.

Source code in fractal_tasks_core/tasks/copy_ome_zarr_hcs_plate.py
55
56
57
58
59
60
61
62
def _get_image_sub_url(zarr_url: str) -> str:
    """
    Given the absolute `zarr_url` for an OME-Zarr image, return the image
    zarr-group name.
    """
    zarr_url = zarr_url.rstrip("/")
    image_sub_url = zarr_url.split("/")[-1]
    return image_sub_url

_get_plate_url_from_image_url(zarr_url)

Given the absolute zarr_url for an OME-Zarr image within an HCS plate, return the path to the plate zarr group.

Source code in fractal_tasks_core/tasks/copy_ome_zarr_hcs_plate.py
35
36
37
38
39
40
41
42
def _get_plate_url_from_image_url(zarr_url: str) -> str:
    """
    Given the absolute `zarr_url` for an OME-Zarr image within an HCS plate,
    return the path to the plate zarr group.
    """
    zarr_url = zarr_url.rstrip("/")
    plate_path = "/".join(zarr_url.split("/")[:-3])
    return plate_path

_get_well_sub_url(zarr_url)

Given the absolute zarr_url for an OME-Zarr image within an HCS plate, return the path to the image zarr group.

Source code in fractal_tasks_core/tasks/copy_ome_zarr_hcs_plate.py
45
46
47
48
49
50
51
52
def _get_well_sub_url(zarr_url: str) -> str:
    """
    Given the absolute `zarr_url` for an OME-Zarr image within an HCS plate,
    return the path to the image zarr group.
    """
    zarr_url = zarr_url.rstrip("/")
    well_url = "/".join(zarr_url.split("/")[-3:-1])
    return well_url

copy_ome_zarr_hcs_plate(*, zarr_urls, zarr_dir, method=DaskProjectionMethod.MIP, overwrite=False)

Duplicate the OME-Zarr HCS structure for a set of zarr_urls.

This task only processes the zarr images in the zarr_urls, not all the images in the plate. It copies all the plate & well structure, but none of the image metadata or the actual image data:

  • For each plate, create a new OME-Zarr HCS plate with the attributes for all the images in zarr_urls
  • For each well (in each plate), create a new zarr subgroup with the same attributes as the original one.

Note: this task makes use of methods from the Attributes class, see https://zarr.readthedocs.io/en/stable/api/attrs.html.

PARAMETER DESCRIPTION
zarr_urls

List of paths or urls to the individual OME-Zarr image to be processed. (standard argument for Fractal tasks, managed by Fractal server).

TYPE: list[str]

zarr_dir

path of the directory where the new OME-Zarrs will be created. (standard argument for Fractal tasks, managed by Fractal server).

TYPE: str

method

Choose which method to use for intensity projection along the Z axis. mip is the default and performs a maximum intensity projection. minip performs a minimum intensity projection, meanip a mean intensity projection and sumip a sum intensity projection.

TYPE: DaskProjectionMethod DEFAULT: MIP

overwrite

If True, overwrite the task output.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
dict[str, Any]

A parallelization list to be used in a compute task to fill the wells

dict[str, Any]

with OME-Zarr images.

Source code in fractal_tasks_core/tasks/copy_ome_zarr_hcs_plate.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
@validate_call
def copy_ome_zarr_hcs_plate(
    *,
    # Fractal parameters
    zarr_urls: list[str],
    zarr_dir: str,
    method: DaskProjectionMethod = DaskProjectionMethod.MIP,
    # Advanced parameters
    overwrite: bool = False,
) -> dict[str, Any]:
    """
    Duplicate the OME-Zarr HCS structure for a set of zarr_urls.

    This task only processes the zarr images in the zarr_urls, not all the
    images in the plate. It copies all the  plate & well structure, but none
    of the image metadata or the actual image data:

    - For each plate, create a new OME-Zarr HCS plate with the attributes for
        all the images in zarr_urls
    - For each well (in each plate), create a new zarr subgroup with the
       same attributes as the original one.

    Note: this task makes use of methods from the `Attributes` class, see
    https://zarr.readthedocs.io/en/stable/api/attrs.html.

    Args:
        zarr_urls: List of paths or urls to the individual OME-Zarr image to
            be processed.
            (standard argument for Fractal tasks, managed by Fractal server).
        zarr_dir: path of the directory where the new OME-Zarrs will be
            created.
            (standard argument for Fractal tasks, managed by Fractal server).
        method: Choose which method to use for intensity projection along the
            Z axis. mip is the default and performs a maximum intensity
            projection. minip performs a minimum intensity projection, meanip
            a mean intensity projection and sumip a sum intensity projection.
        overwrite: If `True`, overwrite the task output.

    Returns:
        A parallelization list to be used in a compute task to fill the wells
        with OME-Zarr images.
    """

    parallelization_list = []

    # Generate parallelization list
    for zarr_url in zarr_urls:
        old_plate_url = _get_plate_url_from_image_url(zarr_url)
        well_sub_url = _get_well_sub_url(zarr_url)
        old_plate_name = old_plate_url.split(".zarr")[-2].split("/")[-1]
        new_plate_name = f"{old_plate_name}_{method.value}"
        zarrurl_plate_new = f"{zarr_dir}/{new_plate_name}.zarr"
        curr_img_sub_url = _get_image_sub_url(zarr_url)
        new_zarr_url = f"{zarrurl_plate_new}/{well_sub_url}/{curr_img_sub_url}"
        parallelization_item = dict(
            zarr_url=new_zarr_url,
            init_args=dict(
                origin_url=zarr_url, method=method.value, overwrite=overwrite
            ),
        )
        InitArgsMIP(**parallelization_item["init_args"])
        parallelization_list.append(parallelization_item)

    # Generate the plate metadata & parallelization list
    (
        plate_attrs_dicts,
        new_well_image_attrs,
        well_image_attrs,
    ) = _generate_plate_well_metadata(zarr_urls=zarr_urls)

    # Create the new OME-Zarr HCS plate
    for old_plate_url, plate_attrs in plate_attrs_dicts.items():
        old_plate_name = old_plate_url.split(".zarr")[-2].split("/")[-1]
        new_plate_name = f"{old_plate_name}_{method.value}"
        zarrurl_new = f"{zarr_dir}/{new_plate_name}.zarr"
        logger.info(f"{old_plate_url=}")
        logger.info(f"{zarrurl_new=}")
        new_plate_group = open_zarr_group_with_overwrite(
            zarrurl_new, overwrite=overwrite
        )
        new_plate_group.attrs.put(plate_attrs)

        # Write well groups:
        for well_sub_url in new_well_image_attrs[old_plate_url]:
            new_well_group = new_plate_group.create_group(f"{well_sub_url}")
            well_attrs = dict(
                well=dict(
                    images=[
                        img.model_dump(exclude_none=True)
                        for img in new_well_image_attrs[old_plate_url][
                            well_sub_url
                        ]
                    ],
                    version=well_image_attrs[old_plate_url][
                        well_sub_url
                    ].version,
                )
            )
            new_well_group.attrs.put(well_attrs)

    return dict(parallelization_list=parallelization_list)