diff --git a/src/spatialdata_plot/pl/__init__.py b/src/spatialdata_plot/pl/__init__.py index 178b4914..77c74024 100644 --- a/src/spatialdata_plot/pl/__init__.py +++ b/src/spatialdata_plot/pl/__init__.py @@ -1,8 +1,10 @@ from ._palette import make_palette, make_palette_from_data from .basic import PlotAccessor +from .utils import measure_obs __all__ = [ "PlotAccessor", "make_palette", "make_palette_from_data", + "measure_obs", ] diff --git a/src/spatialdata_plot/pl/utils.py b/src/spatialdata_plot/pl/utils.py index 5ddefdb9..b6eab503 100644 --- a/src/spatialdata_plot/pl/utils.py +++ b/src/spatialdata_plot/pl/utils.py @@ -26,6 +26,7 @@ import spatialdata as sd from anndata import AnnData from cycler import Cycler, cycler +from dask.array.core import slices_from_chunks from datashader.core import Canvas from geopandas import GeoDataFrame from matplotlib import colors, patheffects, rcParams @@ -59,6 +60,7 @@ from skimage.util import map_array from spatialdata import ( SpatialData, + deepcopy as sd_deepcopy, get_element_annotators, get_extent, get_values, @@ -67,7 +69,14 @@ ) from spatialdata._core.query.relational_query import _locate_value from spatialdata._types import ArrayLike -from spatialdata.models import Image2DModel, Labels2DModel, SpatialElement, get_table_keys +from spatialdata.models import ( + Image2DModel, + Labels2DModel, + ShapesModel, + SpatialElement, + get_model, + get_table_keys, +) from spatialdata.transformations.operations import get_transformation from spatialdata.transformations.transformations import Scale, Translation from spatialdata.transformations.transformations import Sequence as TransformSequence @@ -4417,3 +4426,305 @@ def _convert_alpha_to_datashader_range(alpha: float) -> float: """Convert alpha from the range [0, 1] to the range [0, 255] used in datashader.""" # prevent a value of 255, bc that led to fully colored test plots instead of just colored points/shapes return min([254, alpha * 255]) + + +# --- Per-cell measurements into the annotating table (centroid / area / equivalent diameter) --- + +# Destination keys (measurements are stored intrinsic, coordinate-system independent). +# obsm["spatial"] is the squidpy/scanpy convention for per-cell coordinates (an N x 2 array). +_CENTROID_OBSM_KEY = "spatial" +_AREA_OBS_KEY = "area" +_DIAMETER_OBS_KEY = "equivalent_diameter" + + +def _pixel_to_coord(idx: ArrayLike, coord: ArrayLike) -> ArrayLike: + """Map fractional pixel indices to intrinsic coordinates along one axis (handles non-unit spacing).""" + spacing = (coord[1] - coord[0]) if len(coord) > 1 else 1.0 + return coord[0] + np.asarray(idx) * spacing + + +def _stream_label_centroid_stats(data: Any) -> tuple[ArrayLike, ArrayLike, ArrayLike, ArrayLike]: + """Per-label ``(labels, mean_x_index, mean_y_index, area)`` via a streaming bincount aggregator. + + Streams the raster block by block — one chunk in memory at a time for a dask array, a + bounded row-block at a time for a numpy array — accumulating per-label ``count`` (= area), + ``sum_x`` and ``sum_y``. Labels are relabelled to a dense ``0..k-1`` range, so memory is + O(number of distinct labels), independent of the raster size *and* of the label-id magnitude + (sparse/global ids do not blow it up). The reduction is additive, so it is exact across block + boundaries. Background label 0 is excluded. + """ + n_rows, n_cols = data.shape + if hasattr(data, "chunks"): # dask + block_slices = slices_from_chunks(data.chunks) + else: + data = np.asarray(data) + # bound the per-block coordinate-weight arrays to ~8M pixels + step = max(1, min(n_rows, (8 << 20) // max(1, n_cols))) + block_slices = [(slice(r0, min(r0 + step, n_rows)), slice(0, n_cols)) for r0 in range(0, n_rows, step)] + + def _load(row_sl: slice, col_sl: slice) -> np.ndarray: + block = data[row_sl, col_sl] + block = np.asarray(block.compute() if hasattr(block, "compute") else block) + # label rasters are integer-valued even when stored as float; cast so np.unique/searchsorted + # stay integer (and the dense bincount indices below are always int). + return block.astype(np.int64) if block.dtype.kind == "f" else block + + # Pass 1: the sorted set of present label values -> dense relabelling (keeps memory O(n_labels)). + uniq = np.zeros(0, dtype=np.int64) + for row_sl, col_sl in block_slices: + uniq = np.union1d(uniq, np.unique(_load(row_sl, col_sl))) + k = uniq.size + + # Pass 2: additive per-(dense-)label count / sum_x / sum_y. + count = np.zeros(k) + sum_x = np.zeros(k) + sum_y = np.zeros(k) + for row_sl, col_sl in block_slices: + block = _load(row_sl, col_sl) + block_rows, block_cols = block.shape + idx = np.searchsorted(uniq, block.reshape(-1)) # dense 0..k-1 indices (always int) + cols = np.tile(np.arange(col_sl.start, col_sl.start + block_cols, dtype=np.float64), block_rows) + rows = np.repeat(np.arange(row_sl.start, row_sl.start + block_rows, dtype=np.float64), block_cols) + count += np.bincount(idx, minlength=k) + sum_x += np.bincount(idx, weights=cols, minlength=k) + sum_y += np.bincount(idx, weights=rows, minlength=k) + + keep = uniq != 0 # drop background; every kept label has count >= 1 + return uniq[keep], sum_x[keep] / count[keep], sum_y[keep] / count[keep], count[keep] + + +def _compute_element_measurements(sdata: SpatialData, element_name: str) -> pd.DataFrame: + """One row per instance with intrinsic ``["x", "y", "area"]``, indexed by instance id. + + Shapes use shapely's vectorized centroid; circles (``Point`` geometry + ``radius``) have + ``area = pi*r**2`` (shapely ``.area`` is 0 for them), polygons use the geometric area. 2D labels + use the streaming bincount aggregator (``area`` = pixel count) — it holds one chunk plus + O(n_labels) accumulators, so it scales to Xenium-size masks where a whole-array ``regionprops`` + table would run out of memory. Area meaning differs across element types but is consistent within + one element. + """ + element = sdata[element_name] + model = get_model(element) + if model is ShapesModel: + geometry = element.geometry + centroids = geometry.centroid + # Dispatch on geometry TYPE, not a column name: circles are Point geometries (shapely .area + # is 0 for them) with a radius -> pi*r**2; everything else uses the true geometric area. + if (geometry.geom_type == "Point").all(): + area = np.pi * np.asarray(element["radius"], dtype=float) ** 2 + else: + area = geometry.area.to_numpy() + return pd.DataFrame( + {"x": centroids.x.to_numpy(), "y": centroids.y.to_numpy(), "area": area}, index=element.index + ) + if model is Labels2DModel: + # multiscale rasters carry their data on the scale0 level + raster = next(iter(element["scale0"].values())) if isinstance(element, DataTree) else element + labels, x_idx, y_idx, area = _stream_label_centroid_stats(raster.data) + # bincount gives mean 0-based pixel indices; map them onto the raster's intrinsic coords. + return pd.DataFrame( + { + "x": _pixel_to_coord(x_idx, raster.coords["x"].values), + "y": _pixel_to_coord(y_idx, raster.coords["y"].values), + "area": np.asarray(area, dtype=float), + }, + index=labels, + ) + raise NotImplementedError( + f"Measurement is only supported for shapes and 2D labels; element {element_name!r} is a {model.__name__}." + ) + + +def _valid_spatial_obsm(arr: ArrayLike, n_obs: int) -> bool: + """Whether ``arr`` is a usable ``obsm["spatial"]``: a 2D ``(n_obs, 2)`` coordinate grid.""" + return bool(arr.ndim == 2 and arr.shape == (n_obs, 2)) + + +def _obsm_region_finite(table: AnnData, key: str, mask: ArrayLike) -> bool: + """Whether ``obsm[key]`` already holds finite coords for every ``mask`` row (already populated).""" + if key not in table.obsm: + return False + arr = np.asarray(table.obsm[key]) + if not _valid_spatial_obsm(arr, table.n_obs): + return False + region = arr[mask].astype(float) + return bool(region.size and np.isfinite(region).all()) + + +def _check_obs_numeric(table: AnnData, key: str) -> None: + """Raise if ``obs[key]`` exists but is non-numeric, before any mutation (avoids half-writes).""" + if key in table.obs and not is_numeric_dtype(table.obs[key]): + raise ValueError( + f"Cannot write measurements into obs[{key!r}]: the existing column is " + f"{table.obs[key].dtype} (not numeric). Drop or rename the column first." + ) + + +def _write_region(table: AnnData, mask: ArrayLike, key: str, values: ArrayLike, *, obsm: bool) -> None: + """Write ``values`` into ``obsm[key]`` (2D) or ``obs[key]`` (1D) at ``mask`` rows; others stay/NaN. + + Refuses to overwrite an incompatible existing ``obsm[key]`` (e.g. a 3-column xyz array) rather + than silently dropping data. + """ + store = table.obsm if obsm else table.obs + if key in store: + existing = np.asarray(store[key]) + if obsm and not _valid_spatial_obsm(existing, table.n_obs): + raise ValueError( + f"Refusing to overwrite obsm[{key!r}] with shape {existing.shape}; expected " + f"({table.n_obs}, 2). Remove it first if you want it replaced." + ) + arr = existing.astype(float, copy=True) + else: + arr = np.full((table.n_obs, 2) if obsm else table.n_obs, np.nan) + arr[mask] = np.asarray(values, dtype=float) + store[key] = arr + + +def _measure_into_table( + sdata: SpatialData, element_name: str, table_name: str, *, centroids: bool, area: bool, diameter: bool +) -> None: + """Compute and write the requested measurements for one element into its annotating table. + + Only the rows belonging to ``element_name`` are touched (a table may annotate several elements). + Centroids already present for those rows (e.g. reader-provided ``obsm["spatial"]``) are not + overwritten; ``area``/``diameter`` overwrite our own columns. All targets are validated before + the first write, so a bad column never leaves the table half-written. + """ + table = sdata.tables[table_name] + _, region_key, instance_key = get_table_keys(table) + mask = (table.obs[region_key].astype(str) == str(element_name)).to_numpy() + if not mask.any(): + raise ValueError(f"Table {table_name!r} does not annotate element {element_name!r} (no matching rows).") + + # #1: never clobber centroids already populated for this element's rows (reader/prior-call coords). + if centroids and _obsm_region_finite(table, _CENTROID_OBSM_KEY, mask): + warnings.warn( + f"obsm[{_CENTROID_OBSM_KEY!r}] is already populated for element {element_name!r}; not " + f"overwriting its centroids (remove it to recompute).", + UserWarning, + stacklevel=3, + ) + centroids = False + if not (centroids or area or diameter): + return + + keys = table.obs[instance_key].to_numpy()[mask] + meas = _compute_element_measurements(sdata, element_name).reindex(keys) + # #2: instance ids annotated in the table but absent from the element reindex to NaN -> warn. + missing = int(meas[["x", "y"]].isna().any(axis=1).sum()) + if missing: + warnings.warn( + f"{missing}/{len(keys)} instances annotated for {element_name!r} have no match in the " + f"element (instance-id dtype mismatch, e.g. str vs int?); writing NaN for them.", + UserWarning, + stacklevel=3, + ) + + # #4: validate obs targets up front so an existing non-numeric column raises before any write. + if area: + _check_obs_numeric(table, _AREA_OBS_KEY) + if diameter: + _check_obs_numeric(table, _DIAMETER_OBS_KEY) + + area_vals = meas["area"].to_numpy() + if centroids: + _write_region(table, mask, _CENTROID_OBSM_KEY, meas[["x", "y"]].to_numpy(), obsm=True) + if area: + _write_region(table, mask, _AREA_OBS_KEY, area_vals, obsm=False) + if diameter: + _write_region(table, mask, _DIAMETER_OBS_KEY, 2.0 * np.sqrt(area_vals / np.pi), obsm=False) + + +def _resolve_measure_table(sdata: SpatialData, element_name: str, table_name: str | None) -> str: + """Resolve the single annotating table for ``element_name`` (where measurements are written).""" + if table_name is not None: + if table_name not in sdata.tables: + raise KeyError(f"Table {table_name!r} not found in `sdata.tables`.") + return table_name + annotators = sorted(get_element_annotators(sdata, element_name)) + if not annotators: + raise ValueError( + f"Element {element_name!r} has no annotating table; per-cell measurements need a table " + f"to write into. Pass `table_name=` or annotate the element first." + ) + if len(annotators) > 1: + raise ValueError( + f"Element {element_name!r} is annotated by multiple tables ({', '.join(annotators)}); " + f"pass `table_name=` to pick one." + ) + return annotators[0] + + +def measure_obs( + sdata: SpatialData, + element: str | None = None, + *, + table_name: str | None = None, + centroids: bool = True, + area: bool = True, + diameter: bool = True, + inplace: bool = True, +) -> SpatialData | None: + """Measure per-cell centroids, area and equivalent diameter into an element's annotating table. + + Computes one centroid, area and equivalent diameter per instance of a shapes or 2D-labels + element and writes them, squidpy-style, into the annotating :class:`~anndata.AnnData` table: + centroids go to ``table.obsm["spatial"]`` (an ``(n_obs, 2)`` array, the squidpy convention), + area and diameter to ``table.obs["area"]`` and ``table.obs["equivalent_diameter"]``. Values are + stored in the element's *intrinsic* pixel coordinates/units (which align directly with the + element's own raster/geometry). + Labels area is the pixel count; shapes area is ``geometry.area`` (``pi*r**2`` for circles); + equivalent diameter is ``2 * sqrt(area / pi)``. Persisting them once lets later renders (and + downstream tools such as squidpy) reuse them instead of recomputing. + + Centroids already present for an element's rows (e.g. a reader-provided ``obsm["spatial"]``) are + **not** overwritten — a warning is emitted and that element's centroid write is skipped (remove + the key to recompute); ``area``/``diameter`` overwrite our own columns. Instances annotated in + the table but absent from the element are written as NaN with a warning. Per-cell measurements + need a table to write into, so an element without an annotating table cannot be measured (this + raises). The label path never densifies the raster — it streams it block by block with memory + O(n_labels), scaling to Xenium-size masks. + + Parameters + ---------- + sdata + The ``SpatialData`` object holding the element and its annotating table. + element + Name of the shapes/2D-labels element to measure. If ``None``, every shapes/2D-labels + element that has exactly one annotating table is measured. + table_name + Name of the annotating table to write into. If ``None``, it is inferred from the element's + annotators (an error is raised when there are zero or several). + centroids, area, diameter + Which measurements to compute/write. At least one must be ``True``. They are written to + ``obsm["spatial"]``, ``obs["area"]`` and ``obs["equivalent_diameter"]`` respectively. + inplace + If ``True`` (default), mutate ``sdata``'s table in place and return ``None``. If ``False``, + operate on a deep copy and return the modified ``SpatialData``. + + Returns + ------- + ``None`` if ``inplace`` is ``True``, otherwise the modified deep-copied ``SpatialData``. + """ + if not (centroids or area or diameter): + raise ValueError("Nothing to measure: set at least one of `centroids`, `area`, `diameter` to True.") + target = sdata if inplace else sd_deepcopy(sdata) + if element is None: + # measure every shapes / 2D-labels element that has exactly one annotating table + names = [ + n + for n in list(target.shapes) + list(target.labels) + if get_model(target[n]) in (ShapesModel, Labels2DModel) and len(get_element_annotators(target, n)) == 1 + ] + if not names: + raise ValueError( + "No shapes/2D-labels element with a single annotating table was found; nothing to " + "measure. Pass an explicit `element=` (and `table_name=` if ambiguous)." + ) + else: + names = [element] + for name in names: + table = _resolve_measure_table(target, name, table_name) + _measure_into_table(target, name, table, centroids=centroids, area=area, diameter=diameter) + return None if inplace else target diff --git a/tests/pl/test_utils.py b/tests/pl/test_utils.py index 4a7c8a5b..87b41495 100644 --- a/tests/pl/test_utils.py +++ b/tests/pl/test_utils.py @@ -8,10 +8,11 @@ import xarray as xr from anndata import AnnData from shapely.geometry import Point -from spatialdata import SpatialData -from spatialdata.models import PointsModel, ShapesModel, TableModel +from spatialdata import SpatialData, get_centroids +from spatialdata.models import Labels2DModel, PointsModel, ShapesModel, TableModel import spatialdata_plot +from spatialdata_plot.pl import measure_obs from spatialdata_plot.pl.render_params import Color, ColorLike from spatialdata_plot.pl.utils import ( _apply_cmap_alpha_to_datashader_result, @@ -496,3 +497,185 @@ def fake_rasterize(*args, **kwargs): assert captured["target_unit_to_pixels"] == pytest.approx(0.15, rel=1e-6), ( f"Expected world-unit basis (0.15), got {captured['target_unit_to_pixels']}" ) + + +def _add_shapes_table(sdata: SpatialData, element: str = "blobs_polygons", name: str = "shapes_table") -> SpatialData: + """Add a table annotating a shapes element so it can be measured.""" + gdf = sdata[element] + adata = AnnData(np.zeros((len(gdf), 1), dtype=np.float32)) + adata.obs["instance_id"] = list(gdf.index) + adata.obs["region"] = element + sdata[name] = TableModel.parse( + adata, region_key="region", instance_key="instance_id", region=element + ) + return sdata + + +def _labels_sdata(arr: np.ndarray, name: str = "lab", table: str = "t") -> SpatialData: + """Build a SpatialData with a single 2D-labels element annotated by a table.""" + ids = np.unique(arr) + ids = ids[ids != 0].astype(int) + adata = AnnData(np.zeros((len(ids), 1), dtype=np.float32)) + adata.obs["instance_id"] = ids + adata.obs["region"] = name + return SpatialData( + labels={name: Labels2DModel.parse(arr, dims=("y", "x"))}, + tables={table: TableModel.parse(adata, region_key="region", instance_key="instance_id", region=name)}, + ) + + +class TestMeasureObs: + """`measure_obs` materializes centroids/area/equivalent diameter into the annotating table.""" + + def test_writes_centroid_area_diameter_for_labels(self, sdata_blobs: SpatialData) -> None: + ret = measure_obs(sdata_blobs, "blobs_labels") + assert ret is None # inplace default + table = sdata_blobs["table"] + + assert "spatial" in table.obsm + coords = table.obsm["spatial"] + assert coords.shape == (table.n_obs, 2) + assert np.isfinite(coords).all() + assert "area" in table.obs and "equivalent_diameter" in table.obs + + # centroids match spatialdata's get_centroids (blobs_labels has the identity transform, + # so global == intrinsic here) + gc = get_centroids(sdata_blobs["blobs_labels"], coordinate_system="global").compute() + expected = gc.reindex(table.obs["instance_id"].to_numpy())[["x", "y"]].to_numpy() + np.testing.assert_allclose(coords, expected, rtol=1e-6) + + # area is the pixel count (positive integers); diameter = 2*sqrt(area/pi) + area = table.obs["area"].to_numpy() + assert (area > 0).all() + np.testing.assert_allclose( + table.obs["equivalent_diameter"].to_numpy(), 2.0 * np.sqrt(area / np.pi), rtol=1e-12 + ) + + def test_writes_for_shapes(self, sdata_blobs: SpatialData) -> None: + _add_shapes_table(sdata_blobs, "blobs_polygons") + measure_obs(sdata_blobs, "blobs_polygons", table_name="shapes_table") + table = sdata_blobs["shapes_table"] + + gdf = sdata_blobs["blobs_polygons"] + expected_xy = np.column_stack([gdf.geometry.centroid.x, gdf.geometry.centroid.y]) + order = table.obs["instance_id"].to_numpy() + expected_xy = expected_xy[[list(gdf.index).index(i) for i in order]] + np.testing.assert_allclose(table.obsm["spatial"], expected_xy, rtol=1e-9) + np.testing.assert_allclose( + table.obs["area"].to_numpy(), + gdf.geometry.area.to_numpy()[[list(gdf.index).index(i) for i in order]], + rtol=1e-9, + ) + + def test_circles_area_is_pi_r_squared(self, sdata_blobs: SpatialData) -> None: + # Circles are stored as Point geometries (radius column); shapely .area is 0 for them, + # so area must be pi * r**2 and equivalent diameter must equal the true diameter 2*r. + _add_shapes_table(sdata_blobs, "blobs_circles", name="circles_table") + measure_obs(sdata_blobs, "blobs_circles", table_name="circles_table") + table = sdata_blobs["circles_table"] + gdf = sdata_blobs["blobs_circles"] + order = table.obs["instance_id"].to_numpy() + r = gdf["radius"].to_numpy()[[list(gdf.index).index(i) for i in order]] + np.testing.assert_allclose(table.obs["area"].to_numpy(), np.pi * r**2, rtol=1e-9) + np.testing.assert_allclose(table.obs["equivalent_diameter"].to_numpy(), 2.0 * r, rtol=1e-9) + + def test_inplace_false_leaves_original_untouched(self, sdata_blobs: SpatialData) -> None: + out = measure_obs(sdata_blobs, "blobs_labels", inplace=False) + assert isinstance(out, SpatialData) + assert "spatial" in out["table"].obsm + assert "spatial" not in sdata_blobs["table"].obsm # original not mutated + + def test_existing_centroids_not_clobbered(self, sdata_blobs: SpatialData) -> None: + # #1: a populated obsm["spatial"] (reader- or prior-call-provided) is not overwritten. + table = sdata_blobs["table"] + sentinel = np.arange(table.n_obs * 2, dtype=float).reshape(table.n_obs, 2) + table.obsm["spatial"] = sentinel.copy() + with pytest.warns(UserWarning, match="already populated"): + measure_obs(sdata_blobs, "blobs_labels") + np.testing.assert_array_equal(table.obsm["spatial"], sentinel) # centroids untouched + assert "area" in table.obs # area/diameter still written + + def test_centroids_false_keeps_existing_obsm(self, sdata_blobs: SpatialData) -> None: + table = sdata_blobs["table"] + sentinel = np.arange(table.n_obs * 2, dtype=float).reshape(table.n_obs, 2) + table.obsm["spatial"] = sentinel.copy() + measure_obs(sdata_blobs, "blobs_labels", centroids=False) # only area/diameter written + np.testing.assert_array_equal(table.obsm["spatial"], sentinel) + assert "area" in table.obs + + def test_incompatible_obsm_shape_raises(self, sdata_blobs: SpatialData) -> None: + table = sdata_blobs["table"] + table.obsm["spatial"] = np.zeros((table.n_obs, 3)) # e.g. xyz; cannot write 2D centroids over it + with pytest.raises(ValueError, match="Refusing to overwrite"): + measure_obs(sdata_blobs, "blobs_labels") + + def test_unmatched_instance_ids_warn_and_write_nan(self, sdata_blobs: SpatialData) -> None: + # #2: instance ids that don't match the element (e.g. str vs int) -> warn + NaN, not silent. + table = sdata_blobs["table"] + table.obs["instance_id"] = table.obs["instance_id"].astype(str) + with pytest.warns(UserWarning, match="no match"): + measure_obs(sdata_blobs, "blobs_labels") + assert np.isnan(table.obsm["spatial"]).all() + + def test_float_dtype_labels_supported(self, sdata_blobs: SpatialData) -> None: + # #3: a float-typed (but integer-valued) labels raster must not crash np.bincount. + arr = np.asarray(sdata_blobs["blobs_labels"].data).astype(np.float32) + sd = _labels_sdata(arr) + measure_obs(sd, "lab", table_name="t") + assert np.isfinite(sd["t"].obsm["spatial"]).all() + + def test_existing_nonnumeric_column_raises_before_any_write(self, sdata_blobs: SpatialData) -> None: + # #4: a non-numeric collision raises BEFORE obsm is mutated (no half-written table). + table = sdata_blobs["table"] + table.obs["area"] = pd.Categorical(["lo"] * table.n_obs) + with pytest.raises(ValueError, match="not numeric"): + measure_obs(sdata_blobs, "blobs_labels") + assert "spatial" not in table.obsm # atomic: nothing written + + def test_sparse_high_label_ids(self, sdata_blobs: SpatialData) -> None: + # #5: sparse/high label ids (max id >> n_labels) are measured correctly (dense relabelling). + arr = np.asarray(sdata_blobs["blobs_labels"].data) + hi = (arr.astype(np.int64) * 1000) # ids become 1000, 2000, ... ; max id is huge, few labels + measure_obs(sd_hi := _labels_sdata(hi), "lab", table_name="t") + measure_obs(sd_lo := _labels_sdata(arr.astype(np.int64)), "lab", table_name="t") + # relabelling values does not move pixels -> identical centroid set + np.testing.assert_allclose( + np.sort(sd_hi["t"].obsm["spatial"], axis=0), np.sort(sd_lo["t"].obsm["spatial"], axis=0) + ) + + def test_polygon_with_radius_column_uses_geometric_area(self, sdata_blobs: SpatialData) -> None: + # #7: dispatch on geometry type, not the "radius" column name -> polygons use geometry.area. + gdf = sdata_blobs["blobs_polygons"].copy() + gdf["radius"] = 5.0 # incidental column; must NOT trigger the circle (pi*r**2) branch + sdata_blobs["pr"] = ShapesModel.parse(gdf) + _add_shapes_table(sdata_blobs, "pr", name="pt") + measure_obs(sdata_blobs, "pr", table_name="pt") + order = sdata_blobs["pt"].obs["instance_id"].to_numpy() + expected = gdf.geometry.area.to_numpy()[[list(gdf.index).index(i) for i in order]] + np.testing.assert_allclose(sdata_blobs["pt"].obs["area"].to_numpy(), expected, rtol=1e-9) + + def test_flags_select_outputs(self, sdata_blobs: SpatialData) -> None: + measure_obs(sdata_blobs, "blobs_labels", area=False, diameter=False) + table = sdata_blobs["table"] + assert "spatial" in table.obsm + assert "area" not in table.obs and "equivalent_diameter" not in table.obs + + def test_no_annotating_table_raises(self, sdata_blobs: SpatialData) -> None: + # blobs_circles is not annotated by any table + with pytest.raises(ValueError, match="no annotating table"): + measure_obs(sdata_blobs, "blobs_circles") + + def test_ambiguous_table_raises(self, sdata_blobs: SpatialData) -> None: + _add_shapes_table(sdata_blobs, "blobs_polygons", name="table_a") + _add_shapes_table(sdata_blobs, "blobs_polygons", name="table_b") + with pytest.raises(ValueError, match="multiple tables"): + measure_obs(sdata_blobs, "blobs_polygons") + + def test_nothing_to_measure_raises(self, sdata_blobs: SpatialData) -> None: + with pytest.raises(ValueError, match="at least one"): + measure_obs(sdata_blobs, "blobs_labels", centroids=False, area=False, diameter=False) + + def test_element_none_measures_single_table_elements(self, sdata_blobs: SpatialData) -> None: + # default blobs: only blobs_labels has a single annotating table + measure_obs(sdata_blobs) + assert "spatial" in sdata_blobs["table"].obsm