Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ cmake-build*/**
doc/*.pyg
doc/_minted-ITKPerformanceBenchmarking/*


# Python / ASV
*.egg-info/
__pycache__/
.asv/
/itk-repo
89 changes: 89 additions & 0 deletions README-asv.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# ASV harness for ITKPerformanceBenchmarking

Native ITK C++ benchmark executables driven by
[airspeed velocity (asv)](https://asv.readthedocs.io/) for continuous
performance-regression detection on ITK pull requests.

## Design

- **The C++ benchmarks remain canonical.** ASV does not recompile ITK
and does not interpret benchmark results numerically.
- **`environment_type: "existing"`** keeps ASV out of the ITK build
loop. The CI workflow (see `ITK/.github/workflows/perf-benchmark.yml`)
configures and builds ITK + `ITKPerformanceBenchmarking` twice
(merge-base, PR HEAD), then invokes `asv run` against each build.
- **`track_*` not `time_*`.** The timing source of truth is
`HighPriorityRealTimeProbesCollector` inside the C++ subprocess.
The Python shim reads the jsonxx output and returns mean probe
seconds. ASV's built-in timing would include subprocess spawn
overhead, which we want to exclude.

## Environment contract

The shim needs three env vars:

| Var | Purpose |
|-----|---------|
| `ITK_BENCHMARK_BIN` | Dir containing `MedianBenchmark`, `GradientMagnitudeBenchmark`, etc. |
| `ITK_BENCHMARK_DATA` | ExternalData root with `OAS1_0001_MR1_mpr-1_anon.nrrd` fixture |
| `ITK_BENCHMARK_SCRATCH` | Optional scratch dir for per-run output images |

## Local smoke test

```sh
# 1. Build ITK with Module_PerformanceBenchmarking=ON and BUILD_EXAMPLES=ON,
# then build the 5 benchmark targets (full ITK build is not needed):
cmake -S /path/to/ITK -B /path/to/ITK-build -GNinja -DCMAKE_BUILD_TYPE=Release \
-DBUILD_TESTING=ON -DBUILD_EXAMPLES=ON -DBUILD_SHARED_LIBS=OFF \
-DModule_PerformanceBenchmarking=ON
cmake --build /path/to/ITK-build --target \
MedianBenchmark BinaryAddBenchmark GradientMagnitudeBenchmark \
CopyIterationBenchmark VectorIterationBenchmark \
ITKBenchmarksData

# 2. Create the itk-repo symlink required by asv.conf.json:
cd /path/to/ITKPerformanceBenchmarking
ln -sfn /path/to/ITK itk-repo

# 3. Install asv + the shim into a Python environment:
python -m venv .venv && . .venv/bin/activate
pip install "asv>=0.6,<0.7"
pip install -e python

# 4. Point the shim at the build + data:
export ITK_BENCHMARK_BIN=/path/to/ITK-build/bin
export ITK_BENCHMARK_DATA=/path/to/ITK-build/ExternalData/Modules/Remote/PerformanceBenchmarking/examples/Data/Input
export ITK_BENCHMARK_SCRATCH=/tmp/itkperf

# 5. Register the machine (first run only):
asv machine --yes --machine $(hostname -s)

# 6. Run labelled with the current ITK SHA (environment_type: existing
# does not accept range specs — --set-commit-hash does the labelling):
ITK_SHA=$(cd itk-repo && git rev-parse HEAD)
asv run --machine $(hostname -s) --set-commit-hash "$ITK_SHA" --python=same
```

## PR comparison pattern (for CI)

```sh
# Assumes ITK-base-build and ITK-head-build already exist, and the
# itk-repo symlink points at the ITK clone.
BASE_SHA=$(cd itk-repo && git merge-base origin/master HEAD)
HEAD_SHA=$(cd itk-repo && git rev-parse HEAD)

ITK_BENCHMARK_BIN=/path/to/ITK-base-build/bin \
asv run --machine $HOST --set-commit-hash "$BASE_SHA" --python=same

ITK_BENCHMARK_BIN=/path/to/ITK-head-build/bin \
asv run --machine $HOST --set-commit-hash "$HEAD_SHA" --python=same

asv compare "$BASE_SHA" "$HEAD_SHA" --factor=1.10 --split
asv publish
```

## Status

Prototype — May 2026. Covers 5 benchmarks (Core: 2, Filtering: 3).
Remaining ~10 executables (Registration, Segmentation, Resample
variants) will be added once the end-to-end loop is validated.
22 changes: 22 additions & 0 deletions asv.conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"version": 1,
"project": "ITK",
"project_url": "https://itk.org/",
"_comment_repo": "'repo' points at the ITK source tree whose commits label the time series. The caller (CI workflow or local smoke test) must create a symlink named 'itk-repo' at the root of this harness pointing to the ITK clone before invoking asv.",
"repo": "itk-repo",
"branches": ["origin/master"],
"environment_type": "existing",
"show_commit_url": "https://github.com/InsightSoftwareConsortium/ITK/commit/",
"benchmark_dir": "benchmarks",
"env_dir": ".asv/env",
"results_dir": ".asv/results",
"html_dir": ".asv/html",
"build_command": [],
"install_command": [
"python -m pip install --no-build-isolation -e python"
],
"uninstall_command": [
"return-code=any python -m pip uninstall -y itk_perf_shim"
],
"matrix": {}
}
Empty file added benchmarks/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions benchmarks/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""ASV benchmarks for ITK Core-group iteration.

Each `track_*` function returns seconds measured *inside* the C++ executable
via HighPriorityRealTimeProbesCollector. We use `track_*` rather than `time_*`
because the timing comes from the subprocess, not from ASV's wall-clock
instrumentation of the Python callable.
"""

from itk_perf_shim import run_benchmark


class CoreSuite:
timeout = 300.0
unit = "seconds"
number = 1
repeat = 1

def track_copy_iteration(self):
return run_benchmark("core.copy_iteration")

track_copy_iteration.unit = "seconds"

def track_vector_iteration(self):
return run_benchmark("core.vector_iteration")

track_vector_iteration.unit = "seconds"
35 changes: 35 additions & 0 deletions benchmarks/filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""ASV benchmarks for ITK Filtering-group filters."""

from itk_perf_shim import run_benchmark


class FilteringSuite:
timeout = 600.0
unit = "seconds"
number = 1
repeat = 1

def track_binary_add(self):
return run_benchmark("filtering.binary_add")

track_binary_add.unit = "seconds"

def track_unary_add(self):
return run_benchmark("filtering.unary_add")

track_unary_add.unit = "seconds"

def track_gradient_magnitude(self):
return run_benchmark("filtering.gradient_magnitude")

track_gradient_magnitude.unit = "seconds"

def track_median(self):
return run_benchmark("filtering.median")

track_median.unit = "seconds"

def track_min_max_curvature_flow(self):
return run_benchmark("filtering.min_max_curvature_flow")

track_min_max_curvature_flow.unit = "seconds"
30 changes: 30 additions & 0 deletions benchmarks/segmentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""ASV benchmarks for ITK Segmentation-group filters."""

from itk_perf_shim import run_benchmark


class SegmentationSuite:
timeout = 1200.0
unit = "seconds"
number = 1
repeat = 1

def track_region_growing(self):
return run_benchmark("segmentation.region_growing")

track_region_growing.unit = "seconds"

def track_watershed(self):
return run_benchmark("segmentation.watershed")

track_watershed.unit = "seconds"

def track_morphological_watershed(self):
return run_benchmark("segmentation.morphological_watershed")

track_morphological_watershed.unit = "seconds"

def track_level_set(self):
return run_benchmark("segmentation.level_set")

track_level_set.unit = "seconds"
43 changes: 42 additions & 1 deletion examples/Core/itkCopyIterationBenchmark.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,47 @@
#include <fstream>


// Pixel-construction traits.
//
// For scalar and FixedArray-backed images, `static_cast<PixelType>(count)`
// produces a pixel whose value encodes `count`. For VectorImage, whose
// PixelType is `itk::VariableLengthVector<T>`, the single-argument
// constructor is a LENGTH constructor — `VLV<float>(count)` yields a
// vector of size `count`, not a multi-component pixel with value `count`.
// Passing that length-mismatched value through the VectorImage pixel
// accessor used to read out-of-spec memory ("happens to work" prior to
// ITK commit 1d87efa5) and now segfaults, because the accessor copies
// exactly `NumberOfComponentsPerPixel` elements from the source's
// internal data pointer — which is null when the source VLV has size 0.
//
// The specialization below constructs a correctly-sized
// VariableLengthVector pixel, filled with `count` (cast to the value
// type). This makes the benchmark portable across all ITK versions in
// the v5.3 → main range regardless of how `VLV(0)` is represented
// internally.
template <typename TPixel>
struct PixelFiller
{
static TPixel
Make(unsigned int count, unsigned int /*componentsPerPixel*/)
{
return static_cast<TPixel>(count);
}
};

template <typename TValue>
struct PixelFiller<itk::VariableLengthVector<TValue>>
{
static itk::VariableLengthVector<TValue>
Make(unsigned int count, unsigned int componentsPerPixel)
{
itk::VariableLengthVector<TValue> pixel(componentsPerPixel);
pixel.Fill(static_cast<TValue>(count));
return pixel;
}
};


// Helper function to initialize an image with random values
template <typename TImage>
typename TImage::Pointer
Expand All @@ -54,7 +95,7 @@ CreateAndInitializeImage(const typename TImage::SizeType & size, unsigned int nu
itk::ImageRegionIterator<TImage> it(image, region);
for (; !it.IsAtEnd(); ++it)
{
it.Set(static_cast<PixelType>(count));
it.Set(PixelFiller<PixelType>::Make(count, numberOfComponentsPerPixel));
++count;
}

Expand Down
3 changes: 3 additions & 0 deletions python/itk_perf_shim/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .runner import run_benchmark, BenchmarkError

__all__ = ["run_benchmark", "BenchmarkError"]
106 changes: 106 additions & 0 deletions python/itk_perf_shim/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""Maps logical benchmark names to the executable + CLI template.

CLI contract (from examples/*/CMakeLists.txt):
<exe> <timingsJson> <iterations> <threads> <input...> <output...>

Templates use str.format substitution:
{timings_json} — absolute path for the jsonxx output
{iterations} — per-benchmark iteration count
{brain} — brainweb165a10f17.mha (ExternalData fixture)
{brain_x45} — brainweb165a10f17extract45i90z.mha
{brain_x60} — brainweb165a10f17extract60i50z.mha
{output_dir} — scratch dir for benchmark output images

Scope notes:
- MorphologicalWatershedBenchmark's CLI omits the threads argument
(see its argv parsing).
- Registration and Resample benchmarks are intentionally absent from
this initial harness; they require ITK-level compiler fixes
(NrrdIO airFloatQNaN link, ResampleBenchmark -Wmaybe-uninitialized)
that are outside the scope of this PR.
"""

BENCHMARKS = {
"core.copy_iteration": {
"exe": "CopyIterationBenchmark",
"args": ["{timings_json}", "{iterations}", "128"],
"iterations": 25,
},
"core.vector_iteration": {
"exe": "VectorIterationBenchmark",
"args": ["{timings_json}", "{iterations}", "128"],
"iterations": 50,
},
"filtering.binary_add": {
"exe": "BinaryAddBenchmark",
"args": [
"{timings_json}", "{iterations}", "1",
"{brain}", "{brain}", "{output_dir}/BinaryAddBenchmark.mha",
],
"iterations": 10,
},
"filtering.unary_add": {
"exe": "UnaryAddBenchmark",
"args": [
"{timings_json}", "{iterations}", "1",
"{brain}", "{brain}", "{output_dir}/UnaryAddBenchmark.mha",
],
"iterations": 10,
},
"filtering.gradient_magnitude": {
"exe": "GradientMagnitudeBenchmark",
"args": [
"{timings_json}", "{iterations}", "-1",
"{brain}", "{output_dir}/GradientMagnitudeBenchmark.mha",
],
"iterations": 5,
},
"filtering.median": {
"exe": "MedianBenchmark",
"args": [
"{timings_json}", "{iterations}", "-1",
"{brain}", "{output_dir}/MedianBenchmark.mha",
],
"iterations": 3,
},
"filtering.min_max_curvature_flow": {
"exe": "MinMaxCurvatureFlowBenchmark",
"args": [
"{timings_json}", "{iterations}", "-1",
"{brain}", "{output_dir}/MinMaxCurvatureFlowBenchmark.mha",
],
"iterations": 3,
},
"segmentation.region_growing": {
"exe": "RegionGrowingBenchmark",
"args": [
"{timings_json}", "{iterations}", "-1",
"{brain}", "{output_dir}/RegionGrowingBenchmark.mha",
],
"iterations": 3,
},
"segmentation.watershed": {
"exe": "WatershedBenchmark",
"args": [
"{timings_json}", "{iterations}", "-1",
"{brain_x45}", "{output_dir}/WatershedBenchmark.mha",
],
"iterations": 3,
},
"segmentation.morphological_watershed": {
"exe": "MorphologicalWatershedBenchmark",
"args": [
"{timings_json}", "{iterations}",
"{brain_x45}", "{output_dir}/MorphologicalWatershedBenchmark.mha",
],
"iterations": 3,
},
"segmentation.level_set": {
"exe": "LevelSetBenchmark",
"args": [
"{timings_json}", "{iterations}", "-1",
"{brain_x60}", "{output_dir}/LevelSetBenchmark.mha",
],
"iterations": 3,
},
}
Loading
Loading