Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ date_modified: 2026-04-30

- Added [#2267](https://github.com/roboflow/supervision/pull/2267): [`DetectionDataset.as_coco`](https://supervision.roboflow.com/latest/datasets/core/#supervision.dataset.core.DetectionDataset.as_coco) and `save_coco_annotations` now accept `starting_image_id` and `starting_annotation_id` parameters (both default to `1`, preserving existing behavior) and return a `(next_image_id, next_annotation_id)` tuple. Feed the returned values into the next split's call to produce globally unique COCO ids across train/valid/test exports. Fixes id collisions reported in [#768](https://github.com/roboflow/supervision/issues/768). **Note**: the return type changes from `None` to `tuple[int, int]` — callers that assert `result is None` must be updated.

- Added [#2027](https://github.com/roboflow/supervision/issues/2027): [`sv.InferenceSlicer`](https://supervision.roboflow.com/latest/detection/tools/inference_slicer/#supervision.detection.tools.inference_slicer.InferenceSlicer) now accepts an open rasterio-style dataset in addition to in-memory images. Each tile is read lazily via a windowed read instead of loading the whole image, enabling tiled inference on multi-GB aerial/drone GeoTIFFs without running out of memory. Detection is duck-typed, so `rasterio` stays an optional dependency installable via `pip install "supervision[geotiff]"` and the core library imports no rasterio symbols. A geographic (non-projected) CRS raises `ValueError`.

### 0.28.0 <small>Apr 30, 2026</small>

- Added [#2159](https://github.com/roboflow/supervision/pull/2159): [`sv.CompactMask`](https://supervision.roboflow.com/latest/detection/compact_mask/#supervision.detection.compact_mask.CompactMask) for memory-efficient mask storage. Masks are stored as crop-region bounding boxes plus RLE-encoded data instead of full-resolution bitmaps, reducing memory by up to 240× for sparse masks. Integrates transparently with `sv.Detections.mask` — filtering, merging, and `area` all work without materialising the full array.
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ dependencies = [
"scipy>=1.10",
"tqdm>=4.62.3"
]
optional-dependencies.geotiff = [
"rasterio>=1.3",
]
optional-dependencies.metrics = [
"pandas>=2",
]
Expand Down
93 changes: 86 additions & 7 deletions src/supervision/detection/tools/inference_slicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import warnings
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any
from typing import Any, Protocol

import numpy as np
import numpy.typing as npt
Expand All @@ -20,6 +20,35 @@
from supervision.utils.internal import SupervisionWarnings


class WindowedRasterDataset(Protocol):
"""Structural type for a rasterio-style dataset read window-by-window.

Matched structurally (see [`_is_windowed_raster`][]) rather than by import so
`rasterio` stays an optional dependency — any object exposing these members
works. `rasterio.io.DatasetReader` satisfies this protocol.
"""

width: int
height: int
crs: Any

def read(self, window: Any) -> npt.NDArray[Any]: ...


def _is_windowed_raster(image: object) -> bool:
"""Duck-type check for a rasterio-style dataset that supports windowed reads.

Avoids importing rasterio so it remains an optional dependency. numpy arrays
and PIL images do not expose this combination of attributes.
"""
return (
callable(getattr(image, "read", None))
and hasattr(image, "crs")
and hasattr(image, "width")
and hasattr(image, "height")
)


def move_detections(
detections: Detections,
offset: npt.NDArray[Any],
Expand Down Expand Up @@ -138,6 +167,24 @@ def callback(tile):
image = Image.open("example.png")
detections = slicer(image)
```

```python
import rasterio
import supervision as sv

def callback(tile): # tile is (H, W, C); select/convert bands as needed
...

slicer = sv.InferenceSlicer(callback, slice_wh=640, overlap_wh=100)

with rasterio.open("large_orthomosaic.tif") as dataset:
detections = slicer(dataset)
```

Passing an open rasterio dataset reads each tile lazily via a windowed
read, so multi-GB GeoTIFFs never need to be loaded into memory at once.
`rasterio` is an optional dependency installable via
`pip install "supervision[geotiff]"`.
"""

def __init__(
Expand Down Expand Up @@ -175,7 +222,7 @@ def __init__(
self._obb_thread_workers_warned: bool = False
self._obb_thread_workers_lock = threading.Lock()

def __call__(self, image: ImageType) -> Detections:
def __call__(self, image: ImageType | WindowedRasterDataset) -> Detections:
"""
Perform tiled inference on the full image and return merged detections.

Expand All @@ -188,13 +235,31 @@ def __call__(self, image: ImageType) -> Detections:
once per slicer instance.

Args:
image: The full image to run inference on.
image: The full image to run inference on. In addition to in-memory
images (NumPy arrays or PIL images), this also accepts an open
rasterio-style dataset. When a dataset is provided, each tile is
read lazily via a windowed read instead of loading the whole image
into memory, enabling tiled inference on multi-GB GeoTIFFs. Tiles
read from a dataset preserve the source dtype (e.g. ``uint16`` for
16-bit sensors) and keep every band; convert or select bands to
the dtype/channels your model expects inside the callback.

Returns:
Merged detections across all slices.
"""
detections_list: list[Detections] = []
resolution_wh = get_image_resolution_wh(image)
if _is_windowed_raster(image):
crs = image.crs
if crs is not None and not crs.is_projected:
raise ValueError(
"InferenceSlicer requires a projected coordinate reference "
"system for pixel-space tiled inference on a raster dataset. "
f"The provided dataset uses a geographic CRS ({crs}). Reproject "
"it to a projected CRS (e.g. with `gdalwarp`) before slicing."
)
resolution_wh = (image.width, image.height)
else:
resolution_wh = get_image_resolution_wh(image)

offsets = self._generate_offset(
resolution_wh=resolution_wh,
Expand Down Expand Up @@ -272,7 +337,9 @@ def __call__(self, image: ImageType) -> Detections:
)
return merged

def _run_callback(self, image: ImageType, offset: npt.NDArray[Any]) -> Detections:
def _run_callback(
self, image: ImageType | WindowedRasterDataset, offset: npt.NDArray[Any]
) -> Detections:
"""
Run detection callback on a sliced portion of the image and adjust coordinates.

Expand All @@ -284,7 +351,20 @@ def _run_callback(self, image: ImageType, offset: npt.NDArray[Any]) -> Detection
Returns:
Detections adjusted to the full image coordinate system.
"""
image_slice = crop_image(image=image, xyxy=offset)
if _is_windowed_raster(image):
x_min, y_min, x_max, y_max = (int(v) for v in offset)
# rasterio tuple window:
# ((row_start, row_stop), (col_start, col_stop))
window = ((y_min, y_max), (x_min, x_max))
bands = image.read(window=window) # shape (channels, height, width)
image_slice = np.ascontiguousarray(
np.transpose(bands, (1, 2, 0))
) # -> (H, W, C)
resolution_wh = (image.width, image.height)
else:
image_slice = crop_image(image=image, xyxy=offset)
resolution_wh = get_image_resolution_wh(image)

detections = self.callback(image_slice)

if (
Expand All @@ -299,7 +379,6 @@ def _run_callback(self, image: ImageType, offset: npt.NDArray[Any]) -> Detection
image_shape=(slice_h, slice_w),
)

resolution_wh = get_image_resolution_wh(image)
# Fast-path: skip locking and bounds checking when the warning has already
# been emitted or when there are no detections to inspect.
needs_warning_check = (
Expand Down
Loading
Loading