Skip to content

pyramids

Construct and write pyramid of lower-resolution levels.

build_pyramid(*, zarrurl, overwrite=False, num_levels=2, coarsening_xy=2, chunksize=None, aggregation_function=None, open_array_kwargs=None)

Starting from on-disk highest-resolution data, build and write to disk a pyramid with (num_levels - 1) coarsened levels. This function works for 2D, 3D or 4D arrays.

PARAMETER DESCRIPTION
zarrurl

Path of the image zarr group, not including the multiscale-level path (e.g. "some/path/plate.zarr/B/03/0").

TYPE: Union[str, Path]

overwrite

Whether to overwrite existing pyramid levels.

TYPE: bool DEFAULT: False

num_levels

Total number of pyramid levels (including 0).

TYPE: int DEFAULT: 2

coarsening_xy

Linear coarsening factor between subsequent levels.

TYPE: int DEFAULT: 2

chunksize

Shape of a single chunk.

TYPE: Optional[Sequence[int]] DEFAULT: None

aggregation_function

Function to be used when downsampling.

TYPE: Optional[Callable] DEFAULT: None

open_array_kwargs

Additional arguments for zarr.open.

TYPE: Optional[Mapping] DEFAULT: None

Source code in fractal_tasks_core/pyramids.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def build_pyramid(
    *,
    zarrurl: Union[str, pathlib.Path],
    overwrite: bool = False,
    num_levels: int = 2,
    coarsening_xy: int = 2,
    chunksize: Optional[Sequence[int]] = None,
    aggregation_function: Optional[Callable] = None,
    open_array_kwargs: Optional[Mapping] = None,
) -> None:

    """
    Starting from on-disk highest-resolution data, build and write to disk a
    pyramid with `(num_levels - 1)` coarsened levels.
    This function works for 2D, 3D or 4D arrays.

    Args:
        zarrurl: Path of the image zarr group, not including the
            multiscale-level path (e.g. `"some/path/plate.zarr/B/03/0"`).
        overwrite: Whether to overwrite existing pyramid levels.
        num_levels: Total number of pyramid levels (including 0).
        coarsening_xy: Linear coarsening factor between subsequent levels.
        chunksize: Shape of a single chunk.
        aggregation_function: Function to be used when downsampling.
        open_array_kwargs: Additional arguments for zarr.open.
    """

    # Clean up zarrurl
    zarrurl = str(pathlib.Path(zarrurl))  # FIXME

    # Select full-resolution multiscale level
    zarrurl_highres = f"{zarrurl}/0"
    logger.info(f"[build_pyramid] High-resolution path: {zarrurl_highres}")

    # Lazily load highest-resolution data
    data_highres = da.from_zarr(zarrurl_highres)
    logger.info(f"[build_pyramid] High-resolution data: {str(data_highres)}")

    # Check the number of axes and identify YX dimensions
    ndims = len(data_highres.shape)
    if ndims not in [2, 3, 4]:
        raise ValueError(f"{data_highres.shape=}, ndims not in [2,3,4]")
    y_axis = ndims - 2
    x_axis = ndims - 1

    # Set aggregation_function
    if aggregation_function is None:
        aggregation_function = np.mean

    # Compute and write lower-resolution levels
    previous_level = data_highres
    for ind_level in range(1, num_levels):
        # Verify that coarsening is doable
        if min(previous_level.shape[-2:]) < coarsening_xy:
            raise ValueError(
                f"ERROR: at {ind_level}-th level, "
                f"coarsening_xy={coarsening_xy} "
                f"but previous level has shape {previous_level.shape}"
            )
        # Apply coarsening
        newlevel = da.coarsen(
            aggregation_function,
            previous_level,
            {y_axis: coarsening_xy, x_axis: coarsening_xy},
            trim_excess=True,
        ).astype(data_highres.dtype)

        # Apply rechunking
        if chunksize is None:
            newlevel_rechunked = newlevel
        else:
            newlevel_rechunked = newlevel.rechunk(chunksize)
        logger.info(
            f"[build_pyramid] Level {ind_level} data: "
            f"{str(newlevel_rechunked)}"
        )

        if open_array_kwargs is None:
            open_array_kwargs = {}

        # If overwrite is false, check that the array doesn't exist yet
        if not overwrite:
            try:
                zarr.open(f"{zarrurl}/{ind_level}", mode="r")
                raise ValueError(
                    f"While building the pyramids, pyramid level {ind_level} "
                    "already existed, but `build_pyramid` was called with "
                    f"{overwrite=}."
                )
            except zarr.errors.PathNotFoundError:
                pass

        zarrarr = zarr.open(
            f"{zarrurl}/{ind_level}",
            shape=newlevel_rechunked.shape,
            chunks=newlevel_rechunked.chunksize,
            dtype=newlevel_rechunked.dtype,
            mode="w",
            dimension_separator=open_array_kwargs.get(
                "dimension_separator", "/"
            ),
            **open_array_kwargs,
        )

        # Write zarr and store output (useful to construct next level)
        previous_level = newlevel_rechunked.to_zarr(
            zarrarr,
            overwrite=overwrite,
            compute=True,
            return_stored=True,
            write_empty_chunks=False,
            dimension_separator=open_array_kwargs.get(
                "dimension_separator", "/"
            ),
        )