diff --git a/.gitignore b/.gitignore index 9eb1eac..c5b4598 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,9 @@ cmake-build*/** doc/*.pyg doc/_minted-ITKPerformanceBenchmarking/* + +# Python / ASV +*.egg-info/ +__pycache__/ +.asv/ +/itk-repo diff --git a/README-asv.md b/README-asv.md new file mode 100644 index 0000000..f86225d --- /dev/null +++ b/README-asv.md @@ -0,0 +1,89 @@ +# ASV harness for ITKPerformanceBenchmarking + +Native ITK C++ benchmark executables driven by +[airspeed velocity (asv)](https://asv.readthedocs.io/) for continuous +performance-regression detection on ITK pull requests. + +## Design + +- **The C++ benchmarks remain canonical.** ASV does not recompile ITK + and does not interpret benchmark results numerically. +- **`environment_type: "existing"`** keeps ASV out of the ITK build + loop. The CI workflow (see `ITK/.github/workflows/perf-benchmark.yml`) + configures and builds ITK + `ITKPerformanceBenchmarking` twice + (merge-base, PR HEAD), then invokes `asv run` against each build. +- **`track_*` not `time_*`.** The timing source of truth is + `HighPriorityRealTimeProbesCollector` inside the C++ subprocess. + The Python shim reads the jsonxx output and returns mean probe + seconds. ASV's built-in timing would include subprocess spawn + overhead, which we want to exclude. + +## Environment contract + +The shim needs three env vars: + +| Var | Purpose | +|-----|---------| +| `ITK_BENCHMARK_BIN` | Dir containing `MedianBenchmark`, `GradientMagnitudeBenchmark`, etc. | +| `ITK_BENCHMARK_DATA` | ExternalData root with `OAS1_0001_MR1_mpr-1_anon.nrrd` fixture | +| `ITK_BENCHMARK_SCRATCH` | Optional scratch dir for per-run output images | + +## Local smoke test + +```sh +# 1. Build ITK with Module_PerformanceBenchmarking=ON and BUILD_EXAMPLES=ON, +# then build the 5 benchmark targets (full ITK build is not needed): +cmake -S /path/to/ITK -B /path/to/ITK-build -GNinja -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_TESTING=ON -DBUILD_EXAMPLES=ON -DBUILD_SHARED_LIBS=OFF \ + -DModule_PerformanceBenchmarking=ON +cmake --build /path/to/ITK-build --target \ + MedianBenchmark BinaryAddBenchmark GradientMagnitudeBenchmark \ + CopyIterationBenchmark VectorIterationBenchmark \ + ITKBenchmarksData + +# 2. Create the itk-repo symlink required by asv.conf.json: +cd /path/to/ITKPerformanceBenchmarking +ln -sfn /path/to/ITK itk-repo + +# 3. Install asv + the shim into a Python environment: +python -m venv .venv && . .venv/bin/activate +pip install "asv>=0.6,<0.7" +pip install -e python + +# 4. Point the shim at the build + data: +export ITK_BENCHMARK_BIN=/path/to/ITK-build/bin +export ITK_BENCHMARK_DATA=/path/to/ITK-build/ExternalData/Modules/Remote/PerformanceBenchmarking/examples/Data/Input +export ITK_BENCHMARK_SCRATCH=/tmp/itkperf + +# 5. Register the machine (first run only): +asv machine --yes --machine $(hostname -s) + +# 6. Run labelled with the current ITK SHA (environment_type: existing +# does not accept range specs — --set-commit-hash does the labelling): +ITK_SHA=$(cd itk-repo && git rev-parse HEAD) +asv run --machine $(hostname -s) --set-commit-hash "$ITK_SHA" --python=same +``` + +## PR comparison pattern (for CI) + +```sh +# Assumes ITK-base-build and ITK-head-build already exist, and the +# itk-repo symlink points at the ITK clone. +BASE_SHA=$(cd itk-repo && git merge-base origin/master HEAD) +HEAD_SHA=$(cd itk-repo && git rev-parse HEAD) + +ITK_BENCHMARK_BIN=/path/to/ITK-base-build/bin \ + asv run --machine $HOST --set-commit-hash "$BASE_SHA" --python=same + +ITK_BENCHMARK_BIN=/path/to/ITK-head-build/bin \ + asv run --machine $HOST --set-commit-hash "$HEAD_SHA" --python=same + +asv compare "$BASE_SHA" "$HEAD_SHA" --factor=1.10 --split +asv publish +``` + +## Status + +Prototype — May 2026. Covers 5 benchmarks (Core: 2, Filtering: 3). +Remaining ~10 executables (Registration, Segmentation, Resample +variants) will be added once the end-to-end loop is validated. diff --git a/asv.conf.json b/asv.conf.json new file mode 100644 index 0000000..3049c82 --- /dev/null +++ b/asv.conf.json @@ -0,0 +1,22 @@ +{ + "version": 1, + "project": "ITK", + "project_url": "https://itk.org/", + "_comment_repo": "'repo' points at the ITK source tree whose commits label the time series. The caller (CI workflow or local smoke test) must create a symlink named 'itk-repo' at the root of this harness pointing to the ITK clone before invoking asv.", + "repo": "itk-repo", + "branches": ["origin/master"], + "environment_type": "existing", + "show_commit_url": "https://github.com/InsightSoftwareConsortium/ITK/commit/", + "benchmark_dir": "benchmarks", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html", + "build_command": [], + "install_command": [ + "python -m pip install --no-build-isolation -e python" + ], + "uninstall_command": [ + "return-code=any python -m pip uninstall -y itk_perf_shim" + ], + "matrix": {} +} diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/core.py b/benchmarks/core.py new file mode 100644 index 0000000..3b3625c --- /dev/null +++ b/benchmarks/core.py @@ -0,0 +1,26 @@ +"""ASV benchmarks for ITK Core-group iteration. + +Each `track_*` function returns seconds measured *inside* the C++ executable +via HighPriorityRealTimeProbesCollector. We use `track_*` rather than `time_*` +because the timing comes from the subprocess, not from ASV's wall-clock +instrumentation of the Python callable. +""" + +from itk_perf_shim import run_benchmark + + +class CoreSuite: + timeout = 300.0 + unit = "seconds" + number = 1 + repeat = 1 + + def track_copy_iteration(self): + return run_benchmark("core.copy_iteration") + + track_copy_iteration.unit = "seconds" + + def track_vector_iteration(self): + return run_benchmark("core.vector_iteration") + + track_vector_iteration.unit = "seconds" diff --git a/benchmarks/filtering.py b/benchmarks/filtering.py new file mode 100644 index 0000000..f1b0ff5 --- /dev/null +++ b/benchmarks/filtering.py @@ -0,0 +1,35 @@ +"""ASV benchmarks for ITK Filtering-group filters.""" + +from itk_perf_shim import run_benchmark + + +class FilteringSuite: + timeout = 600.0 + unit = "seconds" + number = 1 + repeat = 1 + + def track_binary_add(self): + return run_benchmark("filtering.binary_add") + + track_binary_add.unit = "seconds" + + def track_unary_add(self): + return run_benchmark("filtering.unary_add") + + track_unary_add.unit = "seconds" + + def track_gradient_magnitude(self): + return run_benchmark("filtering.gradient_magnitude") + + track_gradient_magnitude.unit = "seconds" + + def track_median(self): + return run_benchmark("filtering.median") + + track_median.unit = "seconds" + + def track_min_max_curvature_flow(self): + return run_benchmark("filtering.min_max_curvature_flow") + + track_min_max_curvature_flow.unit = "seconds" diff --git a/benchmarks/segmentation.py b/benchmarks/segmentation.py new file mode 100644 index 0000000..3f81f98 --- /dev/null +++ b/benchmarks/segmentation.py @@ -0,0 +1,30 @@ +"""ASV benchmarks for ITK Segmentation-group filters.""" + +from itk_perf_shim import run_benchmark + + +class SegmentationSuite: + timeout = 1200.0 + unit = "seconds" + number = 1 + repeat = 1 + + def track_region_growing(self): + return run_benchmark("segmentation.region_growing") + + track_region_growing.unit = "seconds" + + def track_watershed(self): + return run_benchmark("segmentation.watershed") + + track_watershed.unit = "seconds" + + def track_morphological_watershed(self): + return run_benchmark("segmentation.morphological_watershed") + + track_morphological_watershed.unit = "seconds" + + def track_level_set(self): + return run_benchmark("segmentation.level_set") + + track_level_set.unit = "seconds" diff --git a/examples/Core/itkCopyIterationBenchmark.cxx b/examples/Core/itkCopyIterationBenchmark.cxx index 43db624..c0f4d6b 100644 --- a/examples/Core/itkCopyIterationBenchmark.cxx +++ b/examples/Core/itkCopyIterationBenchmark.cxx @@ -33,6 +33,47 @@ #include +// Pixel-construction traits. +// +// For scalar and FixedArray-backed images, `static_cast(count)` +// produces a pixel whose value encodes `count`. For VectorImage, whose +// PixelType is `itk::VariableLengthVector`, the single-argument +// constructor is a LENGTH constructor — `VLV(count)` yields a +// vector of size `count`, not a multi-component pixel with value `count`. +// Passing that length-mismatched value through the VectorImage pixel +// accessor used to read out-of-spec memory ("happens to work" prior to +// ITK commit 1d87efa5) and now segfaults, because the accessor copies +// exactly `NumberOfComponentsPerPixel` elements from the source's +// internal data pointer — which is null when the source VLV has size 0. +// +// The specialization below constructs a correctly-sized +// VariableLengthVector pixel, filled with `count` (cast to the value +// type). This makes the benchmark portable across all ITK versions in +// the v5.3 → main range regardless of how `VLV(0)` is represented +// internally. +template +struct PixelFiller +{ + static TPixel + Make(unsigned int count, unsigned int /*componentsPerPixel*/) + { + return static_cast(count); + } +}; + +template +struct PixelFiller> +{ + static itk::VariableLengthVector + Make(unsigned int count, unsigned int componentsPerPixel) + { + itk::VariableLengthVector pixel(componentsPerPixel); + pixel.Fill(static_cast(count)); + return pixel; + } +}; + + // Helper function to initialize an image with random values template typename TImage::Pointer @@ -54,7 +95,7 @@ CreateAndInitializeImage(const typename TImage::SizeType & size, unsigned int nu itk::ImageRegionIterator it(image, region); for (; !it.IsAtEnd(); ++it) { - it.Set(static_cast(count)); + it.Set(PixelFiller::Make(count, numberOfComponentsPerPixel)); ++count; } diff --git a/python/itk_perf_shim/__init__.py b/python/itk_perf_shim/__init__.py new file mode 100644 index 0000000..2c8ab7c --- /dev/null +++ b/python/itk_perf_shim/__init__.py @@ -0,0 +1,3 @@ +from .runner import run_benchmark, BenchmarkError + +__all__ = ["run_benchmark", "BenchmarkError"] diff --git a/python/itk_perf_shim/registry.py b/python/itk_perf_shim/registry.py new file mode 100644 index 0000000..2167377 --- /dev/null +++ b/python/itk_perf_shim/registry.py @@ -0,0 +1,106 @@ +"""Maps logical benchmark names to the executable + CLI template. + +CLI contract (from examples/*/CMakeLists.txt): + + +Templates use str.format substitution: + {timings_json} — absolute path for the jsonxx output + {iterations} — per-benchmark iteration count + {brain} — brainweb165a10f17.mha (ExternalData fixture) + {brain_x45} — brainweb165a10f17extract45i90z.mha + {brain_x60} — brainweb165a10f17extract60i50z.mha + {output_dir} — scratch dir for benchmark output images + +Scope notes: + - MorphologicalWatershedBenchmark's CLI omits the threads argument + (see its argv parsing). + - Registration and Resample benchmarks are intentionally absent from + this initial harness; they require ITK-level compiler fixes + (NrrdIO airFloatQNaN link, ResampleBenchmark -Wmaybe-uninitialized) + that are outside the scope of this PR. +""" + +BENCHMARKS = { + "core.copy_iteration": { + "exe": "CopyIterationBenchmark", + "args": ["{timings_json}", "{iterations}", "128"], + "iterations": 25, + }, + "core.vector_iteration": { + "exe": "VectorIterationBenchmark", + "args": ["{timings_json}", "{iterations}", "128"], + "iterations": 50, + }, + "filtering.binary_add": { + "exe": "BinaryAddBenchmark", + "args": [ + "{timings_json}", "{iterations}", "1", + "{brain}", "{brain}", "{output_dir}/BinaryAddBenchmark.mha", + ], + "iterations": 10, + }, + "filtering.unary_add": { + "exe": "UnaryAddBenchmark", + "args": [ + "{timings_json}", "{iterations}", "1", + "{brain}", "{brain}", "{output_dir}/UnaryAddBenchmark.mha", + ], + "iterations": 10, + }, + "filtering.gradient_magnitude": { + "exe": "GradientMagnitudeBenchmark", + "args": [ + "{timings_json}", "{iterations}", "-1", + "{brain}", "{output_dir}/GradientMagnitudeBenchmark.mha", + ], + "iterations": 5, + }, + "filtering.median": { + "exe": "MedianBenchmark", + "args": [ + "{timings_json}", "{iterations}", "-1", + "{brain}", "{output_dir}/MedianBenchmark.mha", + ], + "iterations": 3, + }, + "filtering.min_max_curvature_flow": { + "exe": "MinMaxCurvatureFlowBenchmark", + "args": [ + "{timings_json}", "{iterations}", "-1", + "{brain}", "{output_dir}/MinMaxCurvatureFlowBenchmark.mha", + ], + "iterations": 3, + }, + "segmentation.region_growing": { + "exe": "RegionGrowingBenchmark", + "args": [ + "{timings_json}", "{iterations}", "-1", + "{brain}", "{output_dir}/RegionGrowingBenchmark.mha", + ], + "iterations": 3, + }, + "segmentation.watershed": { + "exe": "WatershedBenchmark", + "args": [ + "{timings_json}", "{iterations}", "-1", + "{brain_x45}", "{output_dir}/WatershedBenchmark.mha", + ], + "iterations": 3, + }, + "segmentation.morphological_watershed": { + "exe": "MorphologicalWatershedBenchmark", + "args": [ + "{timings_json}", "{iterations}", + "{brain_x45}", "{output_dir}/MorphologicalWatershedBenchmark.mha", + ], + "iterations": 3, + }, + "segmentation.level_set": { + "exe": "LevelSetBenchmark", + "args": [ + "{timings_json}", "{iterations}", "-1", + "{brain_x60}", "{output_dir}/LevelSetBenchmark.mha", + ], + "iterations": 3, + }, +} diff --git a/python/itk_perf_shim/runner.py b/python/itk_perf_shim/runner.py new file mode 100644 index 0000000..aefce5a --- /dev/null +++ b/python/itk_perf_shim/runner.py @@ -0,0 +1,100 @@ +"""Invoke a benchmark executable, parse its jsonxx output, return mean seconds. + +Environment contract (set by the ASV/GHA caller): + ITK_BENCHMARK_BIN — dir containing benchmark executables (required) + ITK_BENCHMARK_DATA — ExternalData root holding the BRAIN image fixture (required) + ITK_BENCHMARK_SCRATCH — scratch dir for output images (optional; tempdir otherwise) +""" + +from __future__ import annotations + +import json +import os +import subprocess +import tempfile +from pathlib import Path + +from .registry import BENCHMARKS + + +class BenchmarkError(RuntimeError): + pass + + +def _resolve_env() -> tuple[Path, Path, Path]: + bin_dir = os.environ.get("ITK_BENCHMARK_BIN") + data_dir = os.environ.get("ITK_BENCHMARK_DATA") + if not bin_dir: + raise BenchmarkError("ITK_BENCHMARK_BIN not set") + if not data_dir: + raise BenchmarkError("ITK_BENCHMARK_DATA not set") + scratch = os.environ.get("ITK_BENCHMARK_SCRATCH") or tempfile.mkdtemp(prefix="itkperf-") + return Path(bin_dir), Path(data_dir), Path(scratch) + + +def _find_exe(bin_dir: Path, exe: str) -> Path: + for candidate in (bin_dir / exe, bin_dir / f"{exe}.exe"): + if candidate.is_file() and os.access(candidate, os.X_OK): + return candidate + for match in bin_dir.rglob(exe): + if match.is_file() and os.access(match, os.X_OK): + return match + raise BenchmarkError(f"Executable {exe!r} not found under {bin_dir}") + + +def _mean_probe_seconds(timings_json: Path) -> float: + """Parse HighPriorityRealTimeProbesCollector JSON; average all probes' means. + + The jsonxx output shape (per WriteExpandedReport + JSONReport) is roughly: + { "Probes": [ { "Name": "...", "Mean": , "Min": ..., "Max": ... }, ... ], + "SystemInformation": {...}, "ITKBuildInformation": {...}, ... } + We reduce to a single scalar per benchmark by averaging probe means, since each + C++ benchmark typically has one dominant probe. Multi-probe benchmarks can be + parametrized later. + """ + with timings_json.open() as f: + doc = json.load(f) + probes = doc.get("Probes") or doc.get("probes") or [] + if not probes: + raise BenchmarkError(f"No probes in {timings_json}: keys={list(doc)}") + means = [] + for p in probes: + for key in ("Mean", "mean", "MeanTime", "Mean (s)"): + if key in p: + means.append(float(p[key])) + break + if not means: + raise BenchmarkError(f"No Mean field in probes of {timings_json}") + return sum(means) / len(means) + + +def run_benchmark(name: str) -> float: + if name not in BENCHMARKS: + raise BenchmarkError(f"Unknown benchmark {name!r}") + spec = BENCHMARKS[name] + bin_dir, data_dir, scratch = _resolve_env() + exe = _find_exe(bin_dir, spec["exe"]) + scratch.mkdir(parents=True, exist_ok=True) + + with tempfile.NamedTemporaryFile( + suffix=".json", dir=scratch, delete=False + ) as tf: + timings_json = Path(tf.name) + + subs = { + "timings_json": str(timings_json), + "iterations": str(spec["iterations"]), + "brain": str(data_dir / "brainweb165a10f17.mha"), + "brain_x45": str(data_dir / "brainweb165a10f17extract45i90z.mha"), + "brain_x60": str(data_dir / "brainweb165a10f17extract60i50z.mha"), + "output_dir": str(scratch), + } + argv = [str(exe)] + [a.format(**subs) for a in spec["args"]] + try: + proc = subprocess.run(argv, capture_output=True, text=True, check=True) + except subprocess.CalledProcessError as e: + raise BenchmarkError( + f"{name} failed (rc={e.returncode}):\nstdout={e.stdout}\nstderr={e.stderr}" + ) from e + _ = proc # stdout contains the human-readable report; we parse the JSON file + return _mean_probe_seconds(timings_json) diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..dd790e3 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,13 @@ +[build-system] +requires = ["setuptools>=64"] +build-backend = "setuptools.build_meta" + +[project] +name = "itk_perf_shim" +version = "0.1.0" +description = "Subprocess shim invoking ITKPerformanceBenchmarking C++ benchmarks for ASV." +requires-python = ">=3.9" + +[tool.setuptools.packages.find] +where = ["."] +include = ["itk_perf_shim*"]