Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
94c36c4
Upgrade Jupyter Book 1.0.4 to 2.1.2 with griffe-based API reference
romanlutz Mar 12, 2026
0b66469
Gitignore doc/_api (intermediate JSON), keep doc/api (reviewable mark…
romanlutz Mar 12, 2026
8481c6c
Gitignore doc/api too - fully auto-generated from source
romanlutz Mar 12, 2026
5f04ff1
Use make docs-build in CI workflow (consolidates API gen + JB build)
romanlutz Mar 12, 2026
9dee7c9
Add comments to Makefile and restore docs-api target
romanlutz Mar 12, 2026
fef9f4f
Restore remove_notebook_headers.py and sanitize_notebook_paths.py
romanlutz Mar 12, 2026
1f77a26
Restore notebook pre-commit hooks (remove headers, sanitize paths)
romanlutz Mar 12, 2026
920516a
Add fast doc validation pre-commit hook (replaces validate_jupyter_bo…
romanlutz Mar 12, 2026
26ab3c2
Move pydoc2json.py and gen_api_md.py to build_scripts/
romanlutz Mar 12, 2026
093229a
Add JB2 API workaround context and tracking issue link to scripts
romanlutz Mar 12, 2026
54cf23d
Restore test_sanitize_notebook_paths.py
romanlutz Mar 12, 2026
3d8b794
Fix top navbar URLs (JB2 converts underscores to hyphens in URLs)
romanlutz Mar 12, 2026
3a4272b
Fix all nav URLs - JB2 strips leading numbers and converts underscore…
romanlutz Mar 12, 2026
c56b29c
Remove test_api_documentation.py (validated api.rst which no longer e…
romanlutz Mar 12, 2026
8558820
Skip auto-generated api/ dir in doc validation (gitignored, built in CI)
romanlutz Mar 12, 2026
489dd23
Merge remote-tracking branch 'origin/main' into upgrade-jupyter-book
romanlutz Mar 13, 2026
2435625
Fix backslash paths in myst.yml and update landing page emojis
romanlutz Mar 13, 2026
2da6ce9
Fix ruff format and unused import in validate_docs.py
romanlutz Mar 13, 2026
8f252bf
Fix typo in Makefile: build_build_scripts -> build_scripts
romanlutz Mar 13, 2026
e77dfbc
Fix API doc generation and RSS path for CI
romanlutz Mar 13, 2026
063cdc2
Rewrite RSS generation to parse source markdown instead of JB1 HTML
romanlutz Mar 13, 2026
6083d68
Merge branch 'main' into upgrade-jupyter-book
romanlutz Mar 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: "doc/_build/html"
path: "doc/_build/site"
- name: Deploy to GitHub Pages
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
id: deployment
Expand Down
24 changes: 14 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,20 @@ repos:

- repo: local
hooks:
- id: validate-jupyter-book
name: Validate Jupyter Book Structure
entry: python ./build_scripts/validate_jupyter_book.py
- id: remove-notebook-headers
name: Remove Notebook Headers
entry: python ./build_scripts/remove_notebook_headers.py
language: python
files: ^(doc/.*\.(py|ipynb|md|rst)|doc/_toc\.yml)$
files: ^doc.*\.(ipynb)$
- id: sanitize-notebook-paths
name: Sanitize Notebook Paths
entry: python ./build_scripts/sanitize_notebook_paths.py
language: python
files: ^doc.*\.(ipynb)$
- id: validate-docs
name: Validate Documentation Structure
entry: python ./build_scripts/validate_docs.py
language: python
files: ^(doc/.*\.(py|ipynb|md)|doc/myst\.yml)$
pass_filenames: false
additional_dependencies: ['pyyaml']
- id: website
name: Jupyter Book Build Check
entry: python ./build_scripts/conditional_jb_build.py
language: system
types: [python]
pass_filenames: false
15 changes: 11 additions & 4 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,21 @@ build:
os: ubuntu-22.04
tools:
python: "3.13"
nodejs: "20"
jobs:
pre_build:
- python build_scripts/pydoc2json.py pyrit --submodules -o doc/_api/pyrit_all.json
- python build_scripts/gen_api_md.py
build:
html:
- cd doc && jupyter-book build --all
commands:
- mkdir -p $READTHEDOCS_OUTPUT/html
- cp -r doc/_build/site/* $READTHEDOCS_OUTPUT/html/

python:
install:
- method: pip
path: .
extra_requirements:
- dev

formats:
- pdf
- epub
15 changes: 13 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,22 @@ pre-commit:
mypy:
$(CMD) mypy $(PYMODULE) $(UNIT_TESTS)

# Build the full documentation site:
# 1. Generate API reference JSON from Python source (griffe)
# 2. Convert API JSON to MyST markdown pages
# 3. Build the Jupyter Book site
# 4. Generate RSS feed
docs-build:
uv run jb build -W -v ./doc
cp -r assets doc/_build/assets
uv run python build_scripts/pydoc2json.py pyrit --submodules -o doc/_api/pyrit_all.json
uv run python build_scripts/gen_api_md.py
cd doc && uv run jupyter-book build --all
uv run ./build_scripts/generate_rss.py

# Regenerate only the API reference pages (without building the full site)
docs-api:
uv run python build_scripts/pydoc2json.py pyrit --submodules -o doc/_api/pyrit_all.json
uv run python build_scripts/gen_api_md.py

# Because of import time, "auto" seemed to actually go slower than just using 4 processes
unit-test:
$(CMD) pytest -n 4 --dist=loadfile --cov=$(PYMODULE) $(UNIT_TESTS)
Expand Down
35 changes: 0 additions & 35 deletions build_scripts/conditional_jb_build.py

This file was deleted.

264 changes: 264 additions & 0 deletions build_scripts/gen_api_md.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Generate MyST markdown API reference pages from griffe JSON.

WORKAROUND: Jupyter Book 2 (MyST engine) does not yet have native support for
auto-generating API documentation from Python source code. This script and
pydoc2json.py are a workaround that generates API reference pages from source.
Once JB2/MyST adds native API doc support, these scripts can be replaced.
Tracking issue: https://github.com/jupyter-book/mystmd/issues/1259

Reads the JSON files produced by pydoc2json.py and generates clean
MyST markdown pages suitable for Jupyter Book 2.

Usage:
python build_scripts/gen_api_md.py
"""

import json
from pathlib import Path

API_JSON_DIR = Path("doc/_api")
API_MD_DIR = Path("doc/api")


def render_params(params: list[dict]) -> str:
"""Render parameter list as a markdown table."""
if not params:
return ""
lines = ["| Parameter | Type | Description |", "|---|---|---|"]
for p in params:
name = f"`{p['name']}`"
ptype = p.get("type", "")
desc = p.get("desc", "").replace("\n", " ")
default = p.get("default", "")
if default:
desc += f" Defaults to `{default}`."
lines.append(f"| {name} | `{ptype}` | {desc} |")
return "\n".join(lines)


def render_returns(returns: list[dict]) -> str:
"""Render returns section."""
if not returns:
return ""
parts = ["**Returns:**\n"]
for r in returns:
rtype = r.get("type", "")
desc = r.get("desc", "")
parts.append(f"- `{rtype}` — {desc}")
return "\n".join(parts)


def render_raises(raises: list[dict]) -> str:
"""Render raises section."""
if not raises:
return ""
parts = ["**Raises:**\n"]
for r in raises:
rtype = r.get("type", "")
desc = r.get("desc", "")
parts.append(f"- `{rtype}` — {desc}")
return "\n".join(parts)


def render_signature(member: dict) -> str:
"""Render a function/method signature as a single line."""
params = member.get("signature", [])
if not params:
return "()"
parts = []
for p in params:
name = p["name"]
if name in ("self", "cls"):
continue
ptype = p.get("type", "")
default = p.get("default", "")
if ptype and default:
parts.append(f"{name}: {ptype} = {default}")
elif ptype:
parts.append(f"{name}: {ptype}")
elif default:
parts.append(f"{name}={default}")
else:
parts.append(name)
# Always single line for heading use
sig = ", ".join(parts)
return f"({sig})"


def render_function(func: dict, heading_level: str = "###") -> str:
"""Render a function as markdown."""
name = func["name"]
is_async = func.get("is_async", False)
prefix = "async " if is_async else ""
sig = render_signature(func)
ret = func.get("returns_annotation", "")
ret_str = f" → {ret}" if ret else ""

# Use heading for name, code block for full signature if long
full_sig = f"{prefix}{name}{sig}{ret_str}"
if len(full_sig) > 80:
parts = [f"{heading_level} {prefix}{name}\n"]
parts.append(f"```python\n{prefix}{name}{sig}{ret_str}\n```\n")
else:
parts = [f"{heading_level} `{full_sig}`\n"]

ds = func.get("docstring", {})
if ds:
if ds.get("text"):
parts.append(ds["text"] + "\n")
params_table = render_params(ds.get("params", []))
if params_table:
parts.append(params_table + "\n")
returns = render_returns(ds.get("returns", []))
if returns:
parts.append(returns + "\n")
raises = render_raises(ds.get("raises", []))
if raises:
parts.append(raises + "\n")

return "\n".join(parts)


def render_class(cls: dict) -> str:
"""Render a class as markdown."""
name = cls["name"]
bases = cls.get("bases", [])
bases_str = f"({', '.join(bases)})" if bases else ""

parts = [f"## `class {name}{bases_str}`\n"]

ds = cls.get("docstring", {})
if ds and ds.get("text"):
parts.append(ds["text"] + "\n")

# __init__
init = cls.get("init")
if init:
init_ds = init.get("docstring", {})
if init_ds and init_ds.get("params"):
parts.append("**Constructor Parameters:**\n")
parts.append(render_params(init_ds["params"]) + "\n")

# Methods
methods = cls.get("methods", [])
if methods:
parts.append("**Methods:**\n")
parts.extend(render_function(m, heading_level="####") for m in methods)

return "\n".join(parts)


def render_module(data: dict) -> str:
"""Render a full module page."""
mod_name = data["name"]
parts = [f"# {mod_name}\n"]

ds = data.get("docstring", {})
if ds and ds.get("text"):
parts.append(ds["text"] + "\n")

members = data.get("members", [])

# Separate classes and functions
classes = [m for m in members if m.get("kind") == "class"]
functions = [m for m in members if m.get("kind") == "function"]
aliases = [m for m in members if m.get("kind") == "alias"]

if functions:
parts.append("## Functions\n")
parts.extend(render_function(f) for f in functions)

parts.extend(render_class(cls) for cls in classes)

return "\n".join(parts)


def split_aggregate_json(api_json_dir: Path) -> None:
"""Split aggregate JSON files that contain nested submodules into individual files.

When pydoc2json.py runs with --submodules, it produces a single JSON file
(e.g. pyrit_all.json) whose members are submodules. This function recursively
splits those nested submodules into individual JSON files so that each
submodule gets its own API reference page.
"""
for jf in sorted(api_json_dir.glob("*.json")):
data = json.loads(jf.read_text(encoding="utf-8"))
_split_submodules(data, jf.name, api_json_dir)


def _split_submodules(data: dict, source_name: str, api_json_dir: Path) -> None:
"""Recursively extract and write submodule members to individual JSON files."""
for member in data.get("members", []):
if member.get("kind") != "module":
continue
sub_name = member["name"]
sub_path = api_json_dir / f"{sub_name}.json"
if not sub_path.exists():
sub_path.write_text(json.dumps(member, indent=2, default=str), encoding="utf-8")
print(f"Split {sub_name} from {source_name}")
# Recurse into nested submodules
_split_submodules(member, source_name, api_json_dir)


def main() -> None:
API_MD_DIR.mkdir(parents=True, exist_ok=True)

# Split aggregate JSON files (e.g. pyrit_all.json) into per-module files
split_aggregate_json(API_JSON_DIR)

# Exclude aggregate files that only contain submodules (no direct classes/functions)
json_files = sorted(API_JSON_DIR.glob("*.json"))
if not json_files:
print("No JSON files found in", API_JSON_DIR)
return

# Collect module data, skipping pure-aggregate files
modules = []
for jf in json_files:
data = json.loads(jf.read_text(encoding="utf-8"))
members = data.get("members", [])
# Skip files whose members are all submodules (aggregates like pyrit_all.json)
non_module_members = [m for m in members if m.get("kind") != "module"]
if not non_module_members and any(m.get("kind") == "module" for m in members):
continue
modules.append(data)

# Generate index page
index_parts = ["# API Reference\n"]
for data in modules:
mod_name = data["name"]
members = data.get("members", [])
member_count = len(members)
slug = mod_name.replace(".", "_")
classes = [m["name"] for m in members if m.get("kind") == "class"][:8]
preview = ", ".join(f"`{c}`" for c in classes)
if len(classes) < member_count:
preview += f" ... ({member_count} total)"
index_parts.append(f"## [{mod_name}]({slug}.md)\n")
if preview:
index_parts.append(preview + "\n")

index_path = API_MD_DIR / "index.md"
index_path.write_text("\n".join(index_parts), encoding="utf-8")
print(f"Written {index_path}")

# Generate per-module pages
for data in modules:
mod_name = data["name"]
members = data.get("members", [])
# Skip modules with no members and no meaningful docstring
ds_text = (data.get("docstring") or {}).get("text", "")
if not members and len(ds_text) < 50:
continue
slug = mod_name.replace(".", "_")
md_path = API_MD_DIR / f"{slug}.md"
content = render_module(data)
md_path.write_text(content, encoding="utf-8")
print(f"Written {md_path} ({len(members)} members)")


if __name__ == "__main__":
main()
Loading
Loading