diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cd5dabe5af..32a7faa7b1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -59,7 +59,7 @@ jobs: - name: Upload artifact uses: actions/upload-pages-artifact@v3 with: - path: "doc/_build/html" + path: "doc/_build/site" - name: Deploy to GitHub Pages if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} id: deployment diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4c2cbc92ff..9a52214867 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -69,16 +69,20 @@ repos: - repo: local hooks: - - id: validate-jupyter-book - name: Validate Jupyter Book Structure - entry: python ./build_scripts/validate_jupyter_book.py + - id: remove-notebook-headers + name: Remove Notebook Headers + entry: python ./build_scripts/remove_notebook_headers.py language: python - files: ^(doc/.*\.(py|ipynb|md|rst)|doc/_toc\.yml)$ + files: ^doc.*\.(ipynb)$ + - id: sanitize-notebook-paths + name: Sanitize Notebook Paths + entry: python ./build_scripts/sanitize_notebook_paths.py + language: python + files: ^doc.*\.(ipynb)$ + - id: validate-docs + name: Validate Documentation Structure + entry: python ./build_scripts/validate_docs.py + language: python + files: ^(doc/.*\.(py|ipynb|md)|doc/myst\.yml)$ pass_filenames: false additional_dependencies: ['pyyaml'] - - id: website - name: Jupyter Book Build Check - entry: python ./build_scripts/conditional_jb_build.py - language: system - types: [python] - pass_filenames: false diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b79901806c..eb355fee59 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,6 +9,17 @@ build: os: ubuntu-22.04 tools: python: "3.13" + nodejs: "20" + jobs: + pre_build: + - python build_scripts/pydoc2json.py pyrit --submodules -o doc/_api/pyrit_all.json + - python build_scripts/gen_api_md.py + build: + html: + - cd doc && jupyter-book build --all + commands: + - mkdir -p $READTHEDOCS_OUTPUT/html + - cp -r doc/_build/site/* $READTHEDOCS_OUTPUT/html/ python: install: @@ -16,7 +27,3 @@ python: path: . extra_requirements: - dev - -formats: - - pdf - - epub diff --git a/Makefile b/Makefile index ad2f38df44..c512780cd1 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,22 @@ pre-commit: mypy: $(CMD) mypy $(PYMODULE) $(UNIT_TESTS) +# Build the full documentation site: +# 1. Generate API reference JSON from Python source (griffe) +# 2. Convert API JSON to MyST markdown pages +# 3. Build the Jupyter Book site +# 4. Generate RSS feed docs-build: - uv run jb build -W -v ./doc - cp -r assets doc/_build/assets + uv run python build_scripts/pydoc2json.py pyrit --submodules -o doc/_api/pyrit_all.json + uv run python build_scripts/gen_api_md.py + cd doc && uv run jupyter-book build --all uv run ./build_scripts/generate_rss.py +# Regenerate only the API reference pages (without building the full site) +docs-api: + uv run python build_scripts/pydoc2json.py pyrit --submodules -o doc/_api/pyrit_all.json + uv run python build_scripts/gen_api_md.py + # Because of import time, "auto" seemed to actually go slower than just using 4 processes unit-test: $(CMD) pytest -n 4 --dist=loadfile --cov=$(PYMODULE) $(UNIT_TESTS) diff --git a/build_scripts/conditional_jb_build.py b/build_scripts/conditional_jb_build.py deleted file mode 100644 index 0258aa1f3b..0000000000 --- a/build_scripts/conditional_jb_build.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -""" -Conditional Jupyter Book build wrapper for pre-commit. - -This script checks the RUN_LONG_PRECOMMIT environment variable: -- If set to "true", runs the full `jb build -W -q ./doc` command -- Otherwise, exits successfully (fast validation script runs instead) - -This allows CI/pipeline to run full builds while local development uses fast validation. -""" - -import os -import subprocess -import sys - - -def main(): - run_long = os.environ.get("RUN_LONG_PRECOMMIT", "").lower() == "true" - - if run_long: - print("RUN_LONG_PRECOMMIT=true: Running full Jupyter Book build...") - # Run jb build with the same flags as before - result = subprocess.run( - ["jb", "build", "-W", "-q", "./doc"], - cwd=os.path.dirname(os.path.dirname(__file__)), # Repository root - ) - return result.returncode - print("RUN_LONG_PRECOMMIT not set: Skipping full Jupyter Book build (fast validation runs instead)") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/build_scripts/gen_api_md.py b/build_scripts/gen_api_md.py new file mode 100644 index 0000000000..890d5cbd32 --- /dev/null +++ b/build_scripts/gen_api_md.py @@ -0,0 +1,264 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Generate MyST markdown API reference pages from griffe JSON. + +WORKAROUND: Jupyter Book 2 (MyST engine) does not yet have native support for +auto-generating API documentation from Python source code. This script and +pydoc2json.py are a workaround that generates API reference pages from source. +Once JB2/MyST adds native API doc support, these scripts can be replaced. +Tracking issue: https://github.com/jupyter-book/mystmd/issues/1259 + +Reads the JSON files produced by pydoc2json.py and generates clean +MyST markdown pages suitable for Jupyter Book 2. + +Usage: + python build_scripts/gen_api_md.py +""" + +import json +from pathlib import Path + +API_JSON_DIR = Path("doc/_api") +API_MD_DIR = Path("doc/api") + + +def render_params(params: list[dict]) -> str: + """Render parameter list as a markdown table.""" + if not params: + return "" + lines = ["| Parameter | Type | Description |", "|---|---|---|"] + for p in params: + name = f"`{p['name']}`" + ptype = p.get("type", "") + desc = p.get("desc", "").replace("\n", " ") + default = p.get("default", "") + if default: + desc += f" Defaults to `{default}`." + lines.append(f"| {name} | `{ptype}` | {desc} |") + return "\n".join(lines) + + +def render_returns(returns: list[dict]) -> str: + """Render returns section.""" + if not returns: + return "" + parts = ["**Returns:**\n"] + for r in returns: + rtype = r.get("type", "") + desc = r.get("desc", "") + parts.append(f"- `{rtype}` — {desc}") + return "\n".join(parts) + + +def render_raises(raises: list[dict]) -> str: + """Render raises section.""" + if not raises: + return "" + parts = ["**Raises:**\n"] + for r in raises: + rtype = r.get("type", "") + desc = r.get("desc", "") + parts.append(f"- `{rtype}` — {desc}") + return "\n".join(parts) + + +def render_signature(member: dict) -> str: + """Render a function/method signature as a single line.""" + params = member.get("signature", []) + if not params: + return "()" + parts = [] + for p in params: + name = p["name"] + if name in ("self", "cls"): + continue + ptype = p.get("type", "") + default = p.get("default", "") + if ptype and default: + parts.append(f"{name}: {ptype} = {default}") + elif ptype: + parts.append(f"{name}: {ptype}") + elif default: + parts.append(f"{name}={default}") + else: + parts.append(name) + # Always single line for heading use + sig = ", ".join(parts) + return f"({sig})" + + +def render_function(func: dict, heading_level: str = "###") -> str: + """Render a function as markdown.""" + name = func["name"] + is_async = func.get("is_async", False) + prefix = "async " if is_async else "" + sig = render_signature(func) + ret = func.get("returns_annotation", "") + ret_str = f" → {ret}" if ret else "" + + # Use heading for name, code block for full signature if long + full_sig = f"{prefix}{name}{sig}{ret_str}" + if len(full_sig) > 80: + parts = [f"{heading_level} {prefix}{name}\n"] + parts.append(f"```python\n{prefix}{name}{sig}{ret_str}\n```\n") + else: + parts = [f"{heading_level} `{full_sig}`\n"] + + ds = func.get("docstring", {}) + if ds: + if ds.get("text"): + parts.append(ds["text"] + "\n") + params_table = render_params(ds.get("params", [])) + if params_table: + parts.append(params_table + "\n") + returns = render_returns(ds.get("returns", [])) + if returns: + parts.append(returns + "\n") + raises = render_raises(ds.get("raises", [])) + if raises: + parts.append(raises + "\n") + + return "\n".join(parts) + + +def render_class(cls: dict) -> str: + """Render a class as markdown.""" + name = cls["name"] + bases = cls.get("bases", []) + bases_str = f"({', '.join(bases)})" if bases else "" + + parts = [f"## `class {name}{bases_str}`\n"] + + ds = cls.get("docstring", {}) + if ds and ds.get("text"): + parts.append(ds["text"] + "\n") + + # __init__ + init = cls.get("init") + if init: + init_ds = init.get("docstring", {}) + if init_ds and init_ds.get("params"): + parts.append("**Constructor Parameters:**\n") + parts.append(render_params(init_ds["params"]) + "\n") + + # Methods + methods = cls.get("methods", []) + if methods: + parts.append("**Methods:**\n") + parts.extend(render_function(m, heading_level="####") for m in methods) + + return "\n".join(parts) + + +def render_module(data: dict) -> str: + """Render a full module page.""" + mod_name = data["name"] + parts = [f"# {mod_name}\n"] + + ds = data.get("docstring", {}) + if ds and ds.get("text"): + parts.append(ds["text"] + "\n") + + members = data.get("members", []) + + # Separate classes and functions + classes = [m for m in members if m.get("kind") == "class"] + functions = [m for m in members if m.get("kind") == "function"] + aliases = [m for m in members if m.get("kind") == "alias"] + + if functions: + parts.append("## Functions\n") + parts.extend(render_function(f) for f in functions) + + parts.extend(render_class(cls) for cls in classes) + + return "\n".join(parts) + + +def split_aggregate_json(api_json_dir: Path) -> None: + """Split aggregate JSON files that contain nested submodules into individual files. + + When pydoc2json.py runs with --submodules, it produces a single JSON file + (e.g. pyrit_all.json) whose members are submodules. This function recursively + splits those nested submodules into individual JSON files so that each + submodule gets its own API reference page. + """ + for jf in sorted(api_json_dir.glob("*.json")): + data = json.loads(jf.read_text(encoding="utf-8")) + _split_submodules(data, jf.name, api_json_dir) + + +def _split_submodules(data: dict, source_name: str, api_json_dir: Path) -> None: + """Recursively extract and write submodule members to individual JSON files.""" + for member in data.get("members", []): + if member.get("kind") != "module": + continue + sub_name = member["name"] + sub_path = api_json_dir / f"{sub_name}.json" + if not sub_path.exists(): + sub_path.write_text(json.dumps(member, indent=2, default=str), encoding="utf-8") + print(f"Split {sub_name} from {source_name}") + # Recurse into nested submodules + _split_submodules(member, source_name, api_json_dir) + + +def main() -> None: + API_MD_DIR.mkdir(parents=True, exist_ok=True) + + # Split aggregate JSON files (e.g. pyrit_all.json) into per-module files + split_aggregate_json(API_JSON_DIR) + + # Exclude aggregate files that only contain submodules (no direct classes/functions) + json_files = sorted(API_JSON_DIR.glob("*.json")) + if not json_files: + print("No JSON files found in", API_JSON_DIR) + return + + # Collect module data, skipping pure-aggregate files + modules = [] + for jf in json_files: + data = json.loads(jf.read_text(encoding="utf-8")) + members = data.get("members", []) + # Skip files whose members are all submodules (aggregates like pyrit_all.json) + non_module_members = [m for m in members if m.get("kind") != "module"] + if not non_module_members and any(m.get("kind") == "module" for m in members): + continue + modules.append(data) + + # Generate index page + index_parts = ["# API Reference\n"] + for data in modules: + mod_name = data["name"] + members = data.get("members", []) + member_count = len(members) + slug = mod_name.replace(".", "_") + classes = [m["name"] for m in members if m.get("kind") == "class"][:8] + preview = ", ".join(f"`{c}`" for c in classes) + if len(classes) < member_count: + preview += f" ... ({member_count} total)" + index_parts.append(f"## [{mod_name}]({slug}.md)\n") + if preview: + index_parts.append(preview + "\n") + + index_path = API_MD_DIR / "index.md" + index_path.write_text("\n".join(index_parts), encoding="utf-8") + print(f"Written {index_path}") + + # Generate per-module pages + for data in modules: + mod_name = data["name"] + members = data.get("members", []) + # Skip modules with no members and no meaningful docstring + ds_text = (data.get("docstring") or {}).get("text", "") + if not members and len(ds_text) < 50: + continue + slug = mod_name.replace(".", "_") + md_path = API_MD_DIR / f"{slug}.md" + content = render_module(data) + md_path.write_text(content, encoding="utf-8") + print(f"Written {md_path} ({len(members)} members)") + + +if __name__ == "__main__": + main() diff --git a/build_scripts/generate_rss.py b/build_scripts/generate_rss.py index ba17c9cf27..bbdd27111f 100644 --- a/build_scripts/generate_rss.py +++ b/build_scripts/generate_rss.py @@ -1,37 +1,69 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. +import re import sys -from html.parser import HTMLParser from pathlib import Path from feedgen.feed import FeedGenerator +BLOG_SOURCE_DIR = Path("doc/blog") +RSS_OUTPUT_DIR = Path("doc/_build/site/blog") -# HTML parser to extract title and description -class BlogEntryParser(HTMLParser): - def __init__(self): - super().__init__() - self.title = "" - self.description = "" - self.current_tag = "" - self.description_step = 0 - def handle_starttag(self, tag, attrs): - if tag == "p": - self.description_step += 1 - self.current_tag = tag +def parse_blog_markdown(filepath: Path) -> tuple[str, str]: + """Extract title and first paragraph from a blog markdown file. - def handle_endtag(self, tag): - if tag == "p": - self.description_step += 1 - self.current_tag = "" + Args: + filepath: Path to the markdown blog file. - def handle_data(self, data): - if self.current_tag == "title": - self.title = data - elif self.description_step == 3: - self.description = self.description + data + Returns: + tuple[str, str]: The title and description extracted from the file. + """ + text = filepath.read_text(encoding="utf-8") + lines = text.strip().split("\n") + + title = "" + description = "" + + # Title is the first heading + for line in lines: + if line.startswith("# "): + title = line.lstrip("# ").strip() + break + + # Description is the first non-empty paragraph after the date line + in_description = False + desc_lines = [] + for line in lines[2:]: + stripped = line.strip() + if not stripped: + if in_description and desc_lines: + break + continue + if stripped.startswith(("", "#")): + continue + in_description = True + desc_lines.append(stripped) + + description = " ".join(desc_lines) + return title, description + + +def extract_date_from_filename(filename: str) -> str: + """Extract publication date from blog filename (e.g. 2024_12_3.md -> 2024-12-03). + + Args: + filename: The blog filename. + + Returns: + str: ISO date string. + """ + match = re.match(r"(\d{4})_(\d{1,2})_(\d{1,2})", filename) + if not match: + return "" + year, month, day = match.groups() + return f"{year}-{int(month):02d}-{int(day):02d}" # Generate the RSS feed structure @@ -43,10 +75,13 @@ def handle_data(self, data): fg.logo("https://azure.github.io/PyRIT/_static/roakey.png") fg.language("en") -# Iterate over the blog files and sort them +# Iterate over the blog source markdown files print("Pulling blog files...") -directory = Path("doc/_build/html/blog/") -files = [file for file in directory.iterdir() if file.is_file() and file.name.startswith("20")] +if not BLOG_SOURCE_DIR.exists(): + print(f"Error: Blog source directory {BLOG_SOURCE_DIR} not found. Exiting.") + sys.exit(1) + +files = [f for f in BLOG_SOURCE_DIR.iterdir() if f.is_file() and f.name.startswith("20") and f.suffix == ".md"] if len(files) == 0: print("Error: No blog files found. Exiting.") sys.exit(1) @@ -56,38 +91,36 @@ def handle_data(self, data): for file in files: print(f"Parsing {file.name}...") fe = fg.add_entry() - fe.link(href=f"https://azure.github.io/PyRIT/blog/{file.name}") - fe.guid(f"https://azure.github.io/PyRIT/blog/{file.name}") + # Blog pages are served at blog/ + page_name = file.stem + fe.link(href=f"https://azure.github.io/PyRIT/blog/{page_name}") + fe.guid(f"https://azure.github.io/PyRIT/blog/{page_name}") - # Extract title and description from HTML content - with open(file, encoding="utf-8") as f: - parser = BlogEntryParser() - parser.feed(f.read()) - fe.title(parser.title) - fe.description(parser.description) + title, description = parse_blog_markdown(file) + fe.title(title) + fe.description(description) - # Extract publication date from file name - fe.pubDate(f"{file.name[:10].replace('_', '-')}T10:00:00Z") + pub_date = extract_date_from_filename(file.name) + if pub_date: + fe.pubDate(f"{pub_date}T10:00:00Z") # Validating the RSS feed print("Validating RSS feed...") first_entry = fg.entry()[-1] -if first_entry.title() != "Multi-Turn orchestrators — PyRIT Documentation": - print("Error: Title parsing failed. Exiting.") +if first_entry.title() != "Multi-Turn orchestrators": + print(f"Error: Title parsing failed. Got: {first_entry.title()!r}. Exiting.") sys.exit(1) -if first_entry.description() != ( - "In PyRIT, orchestrators are typically seen as the top-level component. " - "This is where your attack logic is implemented, while notebooks should " - "primarily be used to configure orchestrators." -): - print("Error: Description parsing failed. Exiting.") +expected_desc_start = "In PyRIT, orchestrators are typically seen as the top-level component." +if not first_entry.description().startswith(expected_desc_start): + print(f"Error: Description parsing failed. Got: {first_entry.description()[:80]!r}. Exiting.") sys.exit(1) # Export the RSS feed print("Exporting RSS feed...") -fg.rss_file("doc/_build/html/blog/rss.xml", pretty=True) -file = Path("doc/_build/html/blog/rss.xml") -if not file.exists() or file.stat().st_size == 0: +RSS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) +rss_path = RSS_OUTPUT_DIR / "rss.xml" +fg.rss_file(str(rss_path), pretty=True) +if not rss_path.exists() or rss_path.stat().st_size == 0: print("Error: RSS feed export failed. Exiting.") sys.exit(1) diff --git a/build_scripts/pydoc2json.py b/build_scripts/pydoc2json.py new file mode 100644 index 0000000000..8a053869ef --- /dev/null +++ b/build_scripts/pydoc2json.py @@ -0,0 +1,208 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Generate API reference JSON from Python source using griffe. + +WORKAROUND: Jupyter Book 2 (MyST engine) does not yet have native support for +auto-generating API documentation from Python source code. This script and +gen_api_md.py are a workaround that generates API reference pages from source. +Once JB2/MyST adds native API doc support, these scripts can be replaced. +Tracking issue: https://github.com/jupyter-book/mystmd/issues/1259 + +Walks the pyrit package, parses Google-style docstrings, and outputs +structured JSON that gen_api_md.py converts to MyST markdown pages. + +Usage: + python scripts/pydoc2json.py pyrit -o doc/_api/pyrit.json + python scripts/pydoc2json.py pyrit.score -o doc/_api/pyrit.score.json +""" + +import argparse +import json +import sys +from pathlib import Path + +import griffe + + +def docstring_to_dict(docstring: griffe.Docstring | None) -> dict | None: + """Parse a griffe Docstring into a structured dict.""" + if not docstring: + return None + + parsed = docstring.parse("google") + result = {"text": "", "params": [], "returns": [], "raises": [], "examples": []} + + for section in parsed: + if section.kind == griffe.DocstringSectionKind.text: + result["text"] = section.value.strip() + elif section.kind == griffe.DocstringSectionKind.parameters: + for param in section.value: + result["params"].append( + { + "name": param.name, + "type": str(param.annotation) if param.annotation else "", + "desc": param.description or "", + "default": str(param.default) if param.default else "", + } + ) + elif section.kind == griffe.DocstringSectionKind.returns: + for ret in section.value: + result["returns"].append( + { + "type": str(ret.annotation) if ret.annotation else "", + "desc": ret.description or "", + } + ) + elif section.kind == griffe.DocstringSectionKind.raises: + for exc in section.value: + result["raises"].append( + { + "type": str(exc.annotation) if exc.annotation else "", + "desc": exc.description or "", + } + ) + elif section.kind == griffe.DocstringSectionKind.examples: + for example in section.value: + if isinstance(example, tuple): + result["examples"].append({"kind": example[0].value, "value": example[1]}) + + # Remove empty fields + return {k: v for k, v in result.items() if v} + + +def function_to_dict(func: griffe.Function) -> dict: + """Convert a griffe Function to a structured dict.""" + sig_params = [] + for param in func.parameters: + p = {"name": param.name} + if param.annotation: + p["type"] = str(param.annotation) + if param.default is not None and str(param.default) != "": + p["default"] = str(param.default) + if param.kind: + p["kind"] = param.kind.value + sig_params.append(p) + + result = { + "name": func.name, + "kind": "function", + "signature": sig_params, + "docstring": docstring_to_dict(func.docstring), + "is_async": func.is_async if hasattr(func, "is_async") else False, + } + if func.returns: + result["returns_annotation"] = str(func.returns) + + return result + + +def class_to_dict(cls: griffe.Class) -> dict: + """Convert a griffe Class to a structured dict.""" + result = { + "name": cls.name, + "kind": "class", + "docstring": docstring_to_dict(cls.docstring), + "bases": [str(b) for b in cls.bases] if cls.bases else [], + "methods": [], + "attributes": [], + } + + # Get __init__ if it has docstring/params + init = cls.members.get("__init__") + if init and isinstance(init, griffe.Function): + result["init"] = function_to_dict(init) + + # Public methods + for name, member in sorted(cls.members.items()): + if name.startswith("_") and name != "__init__": + continue + try: + if isinstance(member, griffe.Function) and name != "__init__": + result["methods"].append(function_to_dict(member)) + elif isinstance(member, griffe.Attribute): + attr = {"name": name} + if member.annotation: + attr["type"] = str(member.annotation) + if member.docstring: + attr["docstring"] = member.docstring.value.strip() + result["attributes"].append(attr) + except Exception: + continue + + # Remove empty fields + if not result["methods"]: + del result["methods"] + if not result["attributes"]: + del result["attributes"] + if not result["bases"]: + del result["bases"] + + return result + + +def module_to_dict(mod: griffe.Module, include_submodules: bool = False) -> dict: + """Convert a griffe Module to a structured dict.""" + result = { + "name": mod.path, + "kind": "module", + "docstring": docstring_to_dict(mod.docstring), + "members": [], + } + + for name, member in sorted(mod.members.items()): + if name.startswith("_"): + continue + try: + if isinstance(member, griffe.Class): + result["members"].append(class_to_dict(member)) + elif isinstance(member, griffe.Function): + result["members"].append(function_to_dict(member)) + elif isinstance(member, griffe.Alias): + # Re-exported names — try to resolve + try: + target = member.final_target + if isinstance(target, griffe.Class): + result["members"].append(class_to_dict(target)) + elif isinstance(target, griffe.Function): + result["members"].append(function_to_dict(target)) + except Exception: + # Unresolvable alias — just record the name + result["members"].append({"name": name, "kind": "alias", "target": str(member.target_path)}) + elif isinstance(member, griffe.Module) and include_submodules: + result["members"].append(module_to_dict(member, include_submodules=True)) + except Exception as e: + print(f" Warning: skipping {name}: {e}", file=sys.stderr) + continue + + return result + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate API reference JSON using griffe") + parser.add_argument("module", help="Python module path (e.g. pyrit or pyrit.score)") + parser.add_argument("-o", "--output", type=Path, help="Output JSON file") + parser.add_argument("--submodules", action="store_true", help="Include submodules recursively") + args = parser.parse_args() + + loader = griffe.GriffeLoader(search_paths=[Path(".")]) + try: + mod = loader.load(args.module) + except Exception as e: + print(f"Error loading {args.module}: {e}", file=sys.stderr) + sys.exit(1) + + data = module_to_dict(mod, include_submodules=args.submodules) + + output_json = json.dumps(data, indent=2, default=str) + + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(output_json, encoding="utf-8") + print(f"Written to {args.output} ({len(data['members'])} members)") + else: + print(output_json) + + +if __name__ == "__main__": + main() diff --git a/build_scripts/validate_docs.py b/build_scripts/validate_docs.py new file mode 100644 index 0000000000..c574bf9e77 --- /dev/null +++ b/build_scripts/validate_docs.py @@ -0,0 +1,152 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Fast validation script for Jupyter Book 2 documentation. + +Validates that all file references in doc/myst.yml exist and detects +orphaned documentation files. Designed to run quickly in pre-commit +instead of a full `jupyter-book build`. + +Exit codes: + 0: All validations passed + 1: Validation errors found +""" + +import sys +from pathlib import Path + +import yaml + + +def parse_toc_files(toc_entries: list, files: set | None = None) -> set[str]: + """Recursively extract all file references from myst.yml toc.""" + if files is None: + files = set() + for entry in toc_entries: + if isinstance(entry, dict): + if "file" in entry: + files.add(entry["file"].replace("\\", "/")) + if "children" in entry: + parse_toc_files(entry["children"], files) + return files + + +def validate_toc_files(toc_files: set[str], doc_root: Path) -> list[str]: + """Check that all files referenced in the TOC exist.""" + # Directories with auto-generated content (gitignored, created during build) + generated_dirs = {"api/", "api\\"} + + errors = [] + for file_ref in toc_files: + # Skip files in auto-generated directories + if any(file_ref.startswith(d) for d in generated_dirs): + continue + file_path = doc_root / file_ref + if not file_path.exists(): + errors.append(f"File referenced in myst.yml TOC not found: '{file_ref}'") + return errors + + +def find_orphaned_files(toc_files: set[str], doc_root: Path) -> list[str]: + """Find documentation files not referenced in the TOC.""" + skip_dirs = { + "_build", + "_api", + "api", + "css", + ".ipynb_checkpoints", + "__pycache__", + "playwright_demo", + "generate_docs", + } + skip_files = { + "myst.yml", + "roakey.png", + "banner.png", + ".gitignore", + "references.bib", + "requirements.txt", + } + + # Normalize TOC references (strip extensions for comparison) + toc_stems = set() + for f in toc_files: + p = Path(f) + toc_stems.add(p.with_suffix("").as_posix()) + toc_stems.add(p.as_posix()) + + orphaned = [] + for file_path in doc_root.rglob("*"): + if file_path.is_dir(): + continue + if any(skip_dir in file_path.parts for skip_dir in skip_dirs): + continue + if file_path.name in skip_files: + continue + if file_path.suffix not in [".md", ".ipynb", ".py", ".rst"]: + continue + # .py companion files for .ipynb are not orphaned + if file_path.suffix == ".py": + notebook_version = file_path.with_suffix(".ipynb") + if notebook_version.exists(): + continue + + rel = file_path.relative_to(doc_root) + rel_posix = rel.as_posix() + rel_stem = rel.with_suffix("").as_posix() + + if rel_posix not in toc_stems and rel_stem not in toc_stems: + orphaned.append(str(rel)) + + return orphaned + + +def main() -> int: + script_dir = Path(__file__).parent + repo_root = script_dir.parent + doc_root = repo_root / "doc" + myst_yml = doc_root / "myst.yml" + + if not myst_yml.exists(): + print(f"ERROR: myst.yml not found at {myst_yml}", file=sys.stderr) + return 1 + + with open(myst_yml, encoding="utf-8") as f: + config = yaml.safe_load(f) + + all_errors = [] + + # Validate TOC file references + toc = config.get("project", {}).get("toc", []) + print("Validating myst.yml TOC file references...") + toc_files = parse_toc_files(toc) + toc_errors = validate_toc_files(toc_files, doc_root) + if toc_errors: + all_errors.extend(f"[myst.yml] {err}" for err in toc_errors) + else: + print(f"[OK] Validated {len(toc_files)} file references in myst.yml") + + # Check for orphaned files + print("Checking for orphaned documentation files...") + orphaned = find_orphaned_files(toc_files, doc_root) + if orphaned: + all_errors.extend(f"[orphaned] File exists but not in myst.yml: {f}" for f in orphaned) + else: + print("[OK] No orphaned documentation files found") + + if all_errors: + print("\n" + "=" * 80, file=sys.stderr) + print("VALIDATION ERRORS FOUND:", file=sys.stderr) + print("=" * 80, file=sys.stderr) + for error in all_errors: + print(f" • {error}", file=sys.stderr) + print("=" * 80, file=sys.stderr) + return 1 + + print("\n[OK] All documentation validations passed!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/build_scripts/validate_jupyter_book.py b/build_scripts/validate_jupyter_book.py deleted file mode 100644 index 50dfbfedc8..0000000000 --- a/build_scripts/validate_jupyter_book.py +++ /dev/null @@ -1,334 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -""" -Fast validation script for Jupyter Book documentation. - -This script performs comprehensive validation of: -1. doc/api.rst - Validates that all module references exist and autosummary members are defined -2. doc/_toc.yml - Validates that all file references exist and detects orphaned doc files - -Designed to replace the slow `jb build` command for local pre-commit validation while -maintaining thoroughness. The full `jb build` still runs in CI/pipeline. - -Exit codes: - 0: All validations passed - 1: Validation errors found -""" - -import os -import re -import sys -from pathlib import Path - -import yaml - - -def parse_api_rst(api_rst_path: Path) -> list[tuple[str, list[str]]]: - """ - Parse api.rst file to extract module names and their autosummary members. - - Returns: - List of tuples: (module_name, [member_names]) - """ - with open(api_rst_path, encoding="utf-8") as f: - content = f.read() - - modules = [] - # Pattern to match autosummary sections - autosummary_pattern = re.compile( - r"\.\. autosummary::\s+:nosignatures:\s+:toctree: _autosummary/\s+((?:\s+\w+\s*\n)+)", re.MULTILINE - ) - - # Split content by module sections using :py:mod:`module.name` - sections = re.split(r":py:mod:`(pyrit\.[^`]+)`", content) - - for i in range(1, len(sections), 2): - if i + 1 < len(sections): - module_name = sections[i] - section_content = sections[i + 1] - - # Extract autosummary members from this section - members = [] - autosummary_match = autosummary_pattern.search(section_content) - if autosummary_match: - members_text = autosummary_match.group(1) - members = [m.strip() for m in members_text.strip().split("\n") if m.strip()] - - modules.append((module_name, members)) - - return modules - - -def validate_api_rst_modules(modules: list[tuple[str, list[str]]], repo_root: Path) -> list[str]: - """ - Validate that modules exist and autosummary members are defined. - - Returns: - List of error messages (empty if all validations passed) - """ - errors = [] - - for module_name, members in modules: - # Check if module file exists - # Convert module name to path: pyrit.analytics -> pyrit/analytics/__init__.py - module_path = module_name.replace(".", os.sep) - possible_paths = [ - repo_root / f"{module_path}.py", - repo_root / module_path / "__init__.py", - ] - - # For pyrit.scenario.* modules, also check in pyrit.scenario.scenarios.* - # These are virtual modules registered via sys.modules aliasing - if module_name.startswith("pyrit.scenario.") and module_name != "pyrit.scenario.scenarios": - # e.g., pyrit.scenario.airt -> pyrit.scenario.scenarios.airt - scenarios_path = module_name.replace("pyrit.scenario.", "pyrit.scenario.scenarios.", 1) - scenarios_module_path = scenarios_path.replace(".", os.sep) - possible_paths.extend( - [ - repo_root / f"{scenarios_module_path}.py", - repo_root / scenarios_module_path / "__init__.py", - ] - ) - - module_exists = any(p.exists() for p in possible_paths) - - if not module_exists: - errors.append(f"Module file not found for '{module_name}': checked {[str(p) for p in possible_paths]}") - continue - - # Validate members by checking the source file directly (works even without dependencies) - if members: - # Find the actual module file - module_file = None - for path in possible_paths: - if path.exists(): - module_file = path - break - - if module_file: - # Read the source file and check for member definitions - try: - with open(module_file, encoding="utf-8") as f: - source_content = f.read() - - for member in members: - # Check for various definition patterns: - # - def member(... - # - class member(... - # - member = ... - # Also check __all__ if present - patterns = [ - rf"^def {re.escape(member)}\s*\(", - rf"^class {re.escape(member)}\s*[\(:]", - rf"^{re.escape(member)}\s*=", - rf"^\s+{re.escape(member)}\s*=", # indented assignments - rf'"{re.escape(member)}"', # in __all__ or strings - rf"'{re.escape(member)}'", - ] - - found = any(re.search(pattern, source_content, re.MULTILINE) for pattern in patterns) - - if not found: - errors.append( - f"Member '{member}' not found in module '{module_name}' (searched {module_file})" - ) - - except Exception as e: - errors.append(f"Error reading source file for '{module_name}': {e}") - - return errors - - -def parse_toc_yml(toc_path: Path) -> set[str]: - """ - Parse _toc.yml file to extract all file references. - - Returns: - Set of file paths (relative to doc/ directory, without extensions) - """ - with open(toc_path, encoding="utf-8") as f: - toc_data = yaml.safe_load(f) - - files = set() - - def extract_files(node): - if isinstance(node, dict): - if "file" in node: - files.add(node["file"]) - if "root" in node: - files.add(node["root"]) - for value in node.values(): - extract_files(value) - elif isinstance(node, list): - for item in node: - extract_files(item) - - extract_files(toc_data) - return files - - -def validate_toc_yml_files(toc_files: set[str], doc_root: Path) -> list[str]: - """ - Validate that all files referenced in _toc.yml exist. - - Returns: - List of error messages (empty if all validations passed) - """ - errors = [] - - for file_ref in toc_files: - # Check if file reference already includes an extension - if file_ref.endswith((".rst", ".md", ".ipynb", ".py")): - if not (doc_root / file_ref).exists(): - errors.append(f"File referenced in _toc.yml not found: '{file_ref}'") - continue - - # Files in _toc.yml are usually listed without extensions - # Check for .md, .ipynb, or .py files - possible_extensions = [".md", ".ipynb", ".py"] - file_exists = any((doc_root / f"{file_ref}{ext}").exists() for ext in possible_extensions) - - if not file_exists: - errors.append( - f"File referenced in _toc.yml not found: '{file_ref}' (checked extensions: {possible_extensions})" - ) - - return errors - - -def find_orphaned_doc_files(toc_files: set[str], doc_root: Path) -> list[str]: - """ - Find documentation files that exist but are not referenced in _toc.yml. - - Returns: - List of orphaned file paths - """ - # Directories to skip - skip_dirs = { - "_build", - "_autosummary", - "_static", - "_templates", - "generate_docs", - ".ipynb_checkpoints", - "__pycache__", - "playwright_demo", - } - - # Files to skip (these are special/configuration files) - skip_files = {"_config.yml", "conf.py", "references.bib", "roakey.png", ".gitignore", "requirements.txt"} - - # Normalize toc_files to handle both with and without extensions - normalized_toc_files = set() - for file_ref in toc_files: - # Add the reference as-is - normalized_toc_files.add(file_ref.replace(os.sep, "/")) - # Also add without extension if it has one - if file_ref.endswith((".rst", ".md", ".ipynb", ".py")): - normalized_toc_files.add(Path(file_ref).with_suffix("").as_posix()) - - orphaned = [] - - for file_path in doc_root.rglob("*"): - # Skip directories - if file_path.is_dir(): - continue - - # Skip if in excluded directories - if any(skip_dir in file_path.parts for skip_dir in skip_dirs): - continue - - # Skip if in excluded files - if file_path.name in skip_files: - continue - - # Only check documentation files - if file_path.suffix not in [".md", ".ipynb", ".py", ".rst"]: - continue - - # Get relative path without extension - rel_path = file_path.relative_to(doc_root) - rel_path_no_ext = str(rel_path.with_suffix("")).replace(os.sep, "/") - rel_path_with_ext = str(rel_path).replace(os.sep, "/") - - # Check if this file is referenced in _toc.yml (with or without extension) - if rel_path_no_ext in normalized_toc_files or rel_path_with_ext in normalized_toc_files: - continue - - # Check if companion .py file for .ipynb (notebooks often have both) - if file_path.suffix == ".py": - notebook_version = file_path.with_suffix(".ipynb") - if notebook_version.exists(): - # .py file is companion to .ipynb, not orphaned - continue - - orphaned.append(str(rel_path)) - - return orphaned - - -def main(): - # Determine repository root (parent of build_scripts) - script_dir = Path(__file__).parent - repo_root = script_dir.parent - doc_root = repo_root / "doc" - api_rst = doc_root / "api.rst" - toc_yml = doc_root / "_toc.yml" - - # Add repo root to sys.path so we can import pyrit modules - if str(repo_root) not in sys.path: - sys.path.insert(0, str(repo_root)) - - # Ensure required files exist - if not api_rst.exists(): - print(f"ERROR: api.rst not found at {api_rst}", file=sys.stderr) - return 1 - - if not toc_yml.exists(): - print(f"ERROR: _toc.yml not found at {toc_yml}", file=sys.stderr) - return 1 - - all_errors = [] - - # Validate api.rst - print("Validating api.rst module references...") - modules = parse_api_rst(api_rst) - api_errors = validate_api_rst_modules(modules, repo_root) - if api_errors: - all_errors.extend([f"[api.rst] {err}" for err in api_errors]) - else: - print(f"[OK] Validated {len(modules)} modules in api.rst") - - # Validate _toc.yml - print("Validating _toc.yml file references...") - toc_files = parse_toc_yml(toc_yml) - toc_errors = validate_toc_yml_files(toc_files, doc_root) - if toc_errors: - all_errors.extend([f"[_toc.yml] {err}" for err in toc_errors]) - else: - print(f"[OK] Validated {len(toc_files)} file references in _toc.yml") - - # Check for orphaned files - print("Checking for orphaned documentation files...") - orphaned = find_orphaned_doc_files(toc_files, doc_root) - if orphaned: - all_errors.extend([f"[orphaned] File exists but not in _toc.yml: {f}" for f in orphaned]) - else: - print("[OK] No orphaned documentation files found") - - # Report results - if all_errors: - print("\n" + "=" * 80, file=sys.stderr) - print("VALIDATION ERRORS FOUND:", file=sys.stderr) - print("=" * 80, file=sys.stderr) - for error in all_errors: - print(f" • {error}", file=sys.stderr) - print("=" * 80, file=sys.stderr) - return 1 - print("\n[OK] All Jupyter Book validations passed!") - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000000..4693f622c7 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,6 @@ +# MyST build outputs +_build + +# Generated API reference (regenerated by scripts/pydoc2json.py and scripts/gen_api_md.py) +_api/ +api/ diff --git a/doc/_config.yml b/doc/_config.yml deleted file mode 100644 index b99d1c211f..0000000000 --- a/doc/_config.yml +++ /dev/null @@ -1,76 +0,0 @@ -# Book settings -# Learn more at https://jupyterbook.org/customize/config.html - -title: PyRIT Documentation -author: Microsoft AI Red Team -copyright: Copyright 2024, Microsoft AI Red Team -logo: roakey.png - -# Force re-execution of notebooks on each build. -# See https://jupyterbook.org/content/execute.html -execute: - execute_notebooks: 'off' - -# Define the name of the latex output file for PDF builds -latex: - latex_documents: - targetname: book.tex - -# Add a bibtex file so that we can create citations -bibtex_bibfiles: - - references.bib - -# Information about where the book exists on the web -repository: - url: https://github.com/Azure/PyRIT - path_to_book: doc - branch: main - -# Add GitHub buttons to your book -# See https://jupyterbook.org/customize/config.html#add-a-link-to-your-repository -html: - favicon: 'roakey.png' - use_issues_button: true - use_repository_button: true - use_edit_page_button: true - extra_static_files: ["_static/custom.js", "_static/custom.css"] - -sphinx: - extra_extensions: - - 'sphinx.ext.autodoc' - - 'sphinx.ext.napoleon' - - 'sphinx.ext.viewcode' - - 'sphinx.ext.autosummary' - - 'sphinx.ext.intersphinx' - - 'sphinxcontrib.mermaid' - config: - autosummary_generate: true - add_module_names: false - suppress_warnings: ["etoc.toctree"] - myst_heading_anchors: 4 - autodoc_default_options: - members: true - show-inheritance: true - undoc-members: true - private-members: false - nitpick_ignore: - - py:class - # - py:exc - # - py:func - # - py:meth - # - py:mod - - py:obj - # - py:var - intersphinx_mapping: - python: - - "https://docs.python.org/3" - - null - numpy: - - "https://numpy.org/doc/stable" - - null - sklearn: - - "https://scikit-learn.org/stable" - - null - pytorch: - - "https://docs.pytorch.org/docs/stable" - - null diff --git a/doc/_toc.yml b/doc/_toc.yml deleted file mode 100644 index 6c7df46de5..0000000000 --- a/doc/_toc.yml +++ /dev/null @@ -1,162 +0,0 @@ -format: jb-book -root: index -chapters: - - file: cookbooks/README - sections: - - file: cookbooks/1_sending_prompts - - file: cookbooks/2_precomputing_turns - - file: cookbooks/3_copyright_violations - - file: cookbooks/4_testing_bias - - file: cookbooks/5_psychosocial_harms - - file: setup/1a_install_uv - sections: - - file: setup/1b_install_docker - - file: setup/1c_install_conda - - file: setup/jupyter_setup - - file: setup/populating_secrets - - file: setup/pyrit_conf - - file: setup/use_azure_sql_db - - file: contributing/README - sections: - - file: contributing/1a_install_uv - - file: contributing/1b_install_devcontainers - - file: contributing/1c_install_conda - - file: contributing/2_git - - file: contributing/3_incorporating_research - - file: contributing/4_style_guide - - file: contributing/5_running_tests - - file: contributing/6_unit_tests - - file: contributing/7_integration_tests - - file: contributing/8_notebooks - - file: contributing/9_pre_commit - - file: contributing/10_exception - - file: contributing/11_release_process - - file: code/architecture - - file: code/user_guide - sections: - - file: code/datasets/0_dataset - sections: - - file: code/datasets/1_loading_datasets - - file: code/datasets/2_seed_programming - - file: code/datasets/3_dataset_writing - - file: code/datasets/4_dataset_coding - - file: code/executor/0_executor - sections: - - file: code/executor/attack/0_attack - sections: - - file: code/executor/attack/1_prompt_sending_attack - - file: code/executor/attack/2_red_teaming_attack - - file: code/executor/attack/3_crescendo_attack - - file: code/executor/attack/chunked_request_attack - - file: code/executor/attack/context_compliance_attack - - file: code/executor/attack/flip_attack - - file: code/executor/attack/many_shot_jailbreak_attack - - file: code/executor/attack/multi_prompt_sending_attack - - file: code/executor/attack/role_play_attack - - file: code/executor/attack/skeleton_key_attack - - file: code/executor/attack/tap_attack - - file: code/executor/attack/violent_durian_attack - - file: code/executor/workflow/0_workflow - sections: - - file: code/executor/workflow/1_xpia_website - - file: code/executor/workflow/2_xpia_ai_recruiter - - file: code/executor/benchmark/0_benchmark - sections: - - file: code/executor/benchmark/1_qa_benchmark - - file: code/executor/promptgen/0_promptgen - sections: - - file: code/executor/promptgen/1_anecdoctor_generator - - file: code/executor/promptgen/fuzzer_generator - - file: code/targets/0_prompt_targets - sections: - - file: code/targets/1_openai_chat_target - - file: code/targets/2_openai_responses_target - - file: code/targets/3_openai_image_target - - file: code/targets/4_openai_video_target - - file: code/targets/5_openai_tts_target - - file: code/targets/6_custom_targets - - file: code/targets/7_non_open_ai_chat_targets - - file: code/targets/8_non_llm_targets - - file: code/targets/9_rate_limiting - - file: code/targets/10_http_target - - file: code/targets/11_message_normalizer - - file: code/targets/10_1_playwright_target - - file: code/targets/10_2_playwright_target_copilot - - file: code/targets/10_3_websocket_copilot_target - - file: code/targets/open_ai_completions - - file: code/targets/prompt_shield_target - - file: code/targets/realtime_target - - file: code/targets/use_huggingface_chat_target - - file: code/converters/0_converters - sections: - - file: code/converters/1_text_to_text_converters - - file: code/converters/2_audio_converters - - file: code/converters/3_image_converters - - file: code/converters/4_video_converters - - file: code/converters/5_file_converters - - file: code/converters/6_selectively_converting - - file: code/converters/7_human_converter - - file: code/scoring/0_scoring - sections: - - file: code/scoring/1_azure_content_safety_scorers - - file: code/scoring/2_true_false_scorers - - file: code/scoring/3_classification_scorers - - file: code/scoring/4_likert_scorers - - file: code/scoring/5_human_in_the_loop_scorer - - file: code/scoring/6_refusal_scorer - - file: code/scoring/7_batch_scorer - - file: code/scoring/8_scorer_metrics - - file: code/scoring/insecure_code_scorer - - file: code/scoring/persuasion_full_conversation_scorer - - file: code/scoring/prompt_shield_scorer - - file: code/scoring/generic_scorers - - file: code/memory/0_memory - sections: - - file: code/memory/1_sqlite_memory - - file: code/memory/2_basic_memory_programming - - file: code/memory/3_memory_data_types - - file: code/memory/4_manually_working_with_memory - - file: code/memory/5_memory_labels - - file: code/memory/6_azure_sql_memory - - file: code/memory/7_azure_sql_memory_attacks - - file: code/memory/8_seed_database - - file: code/memory/9_exporting_data - - file: code/memory/10_schema_diagram.md - - file: code/memory/embeddings - - file: code/setup/0_setup - sections: - - file: code/setup/1_configuration - - file: code/setup/2_resiliency - - file: code/setup/default_values - - file: code/setup/pyrit_initializer - - file: code/auxiliary_attacks/0_auxiliary_attacks - sections: - - file: code/auxiliary_attacks/1_gcg_azure_ml - - file: code/scenarios/0_scenarios - sections: - - file: code/scenarios/1_configuring_scenarios - - file: code/registry/0_registry - sections: - - file: code/registry/1_class_registry - - file: code/registry/2_instance_registry - - file: code/gui/0_gui - - file: code/front_end/0_front_end - sections: - - file: code/front_end/1_pyrit_scan - - file: code/front_end/2_pyrit_shell - - file: deployment/README - sections: - - file: deployment/deploy_hf_model_aml - - file: deployment/download_and_register_hf_model_aml - - file: deployment/hf_aml_model_endpoint_guide - - file: deployment/score_aml_endpoint - - file: deployment/troubleshooting_guide_hf_azureml - - file: api.rst - - file: blog/README - sections: - - file: blog/2025_06_06 - - file: blog/2025_03_03 - - file: blog/2025_02_11 - - file: blog/2025_01_27 - - file: blog/2025_01_14 - - file: blog/2024_12_3 diff --git a/doc/api.rst b/doc/api.rst deleted file mode 100644 index 830fc54357..0000000000 --- a/doc/api.rst +++ /dev/null @@ -1,726 +0,0 @@ -API Reference -============= - -:py:mod:`pyrit.analytics` -========================= - -.. automodule:: pyrit.analytics - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - analyze_results - AttackStats - ConversationAnalytics - -:py:mod:`pyrit.auth` -==================== - -.. automodule:: pyrit.auth - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - Authenticator - AzureAuth - AzureStorageAuth - CopilotAuthenticator - ManualCopilotAuthenticator - -:py:mod:`pyrit.auxiliary_attacks` -================================= - -.. automodule:: pyrit.auxiliary_attacks - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - - -:py:mod:`pyrit.cli` -======================================= - -.. automodule:: pyrit.cli - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - -:py:mod:`pyrit.common` -====================== - -.. automodule:: pyrit.common - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - apply_defaults - apply_defaults_to_method - combine_dict - combine_list - convert_local_image_to_data_url - DefaultValueScope - display_image_response - download_chunk - download_file - download_files - download_specific_files - get_available_files - get_global_default_values - get_httpx_client - get_kwarg_param - get_non_required_value - get_random_indices - get_required_value - is_in_ipython_session - make_request_and_raise_if_error_async - print_deprecation_message - reset_default_values - set_default_value - Singleton - warn_if_set - YamlLoadable - -:py:mod:`pyrit.datasets` -======================== - -.. automodule:: pyrit.datasets - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - SeedDatasetProvider - TextJailBreak - - - -:py:mod:`pyrit.embedding` -========================= - -.. automodule:: pyrit.embedding - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - OpenAITextEmbedding - -:py:mod:`pyrit.exceptions` -========================== - -.. automodule:: pyrit.exceptions - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - BadRequestException - EmptyResponseException - handle_bad_request_exception - InvalidJsonException - MissingPromptPlaceholderException - PyritException - pyrit_custom_result_retry - pyrit_json_retry - pyrit_target_retry - pyrit_placeholder_retry - RateLimitException - remove_markdown_json - -:py:mod:`pyrit.executor.attack` -=============================== - -.. automodule:: pyrit.executor.attack - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AttackAdversarialConfig - AttackContext - AttackConverterConfig - AttackExecutor - AttackExecutorResult - AttackParameters - AttackResultPrinter - AttackScoringConfig - AttackStrategy - ConsoleAttackResultPrinter - ChunkedRequestAttack - ChunkedRequestAttackContext - ContextComplianceAttack - ConversationManager - ConversationSession - ConversationState - CrescendoAttack - CrescendoAttackContext - CrescendoAttackResult - FlipAttack - generate_simulated_conversation_async - ManyShotJailbreakAttack - MarkdownAttackResultPrinter - MultiPromptSendingAttack - MultiPromptSendingAttackParameters - MultiTurnAttackContext - MultiTurnAttackStrategy - PrependedConversationConfig - PromptSendingAttack - RTASystemPromptPaths - RedTeamingAttack - RolePlayAttack - RolePlayPaths - SingleTurnAttackContext - SingleTurnAttackStrategy - SkeletonKeyAttack - TAPAttack - TAPAttackContext - TAPAttackResult - TreeOfAttacksWithPruningAttack - -:py:mod:`pyrit.executor.promptgen` -================================== - -.. automodule:: pyrit.executor.promptgen - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AnecdoctorContext - AnecdoctorGenerator - AnecdoctorResult - PromptGeneratorStrategy - PromptGeneratorStrategyContext - PromptGeneratorStrategyResult - -:py:mod:`pyrit.executor.promptgen.fuzzer` -========================================= - -.. automodule:: pyrit.executor.promptgen.fuzzer - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - FuzzerConverter - FuzzerContext - FuzzerCrossOverConverter - FuzzerExpandConverter - FuzzerGenerator - FuzzerRephraseConverter - FuzzerResult - FuzzerResultPrinter - FuzzerShortenConverter - FuzzerSimilarConverter - -:py:mod:`pyrit.executor.workflow` -================================= - -.. automodule:: pyrit.executor.workflow - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - XPIAContext - XPIAResult - XPIAWorkflow - XPIATestWorkflow - XPIAManualProcessingWorkflow - XPIAProcessingCallback - XPIAStatus - -:py:mod:`pyrit.identifiers` -=========================== - -.. automodule:: pyrit.identifiers - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AtomicAttackEvaluationIdentifier - build_atomic_attack_identifier - build_seed_identifier - ChildEvalRule - class_name_to_snake_case - ComponentIdentifier - compute_eval_hash - config_hash - EvaluationIdentifier - Identifiable - ScorerEvaluationIdentifier - snake_case_to_class_name - -:py:mod:`pyrit.memory` -====================== - -.. automodule:: pyrit.memory - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AttackResultEntry - AzureSQLMemory - CentralMemory - EmbeddingDataEntry - MemoryInterface - MemoryEmbedding - MemoryExporter - PromptMemoryEntry - SeedEntry - SQLiteMemory - -:py:mod:`pyrit.message_normalizer` -================================== - -.. automodule:: pyrit.message_normalizer - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - MessageListNormalizer - MessageStringNormalizer - GenericSystemSquashNormalizer - TokenizerTemplateNormalizer - ConversationContextNormalizer - ChatMessageNormalizer - -:py:mod:`pyrit.models` -====================== - -.. automodule:: pyrit.models - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - ALLOWED_CHAT_MESSAGE_ROLES - AudioPathDataTypeSerializer - AzureBlobStorageIO - BinaryPathDataTypeSerializer - ChatMessage - ChatMessagesDataset - ChatMessageRole - ChatMessageListDictContent - construct_response_from_request - ConversationReference - ConversationStats - ConversationType - DataTypeSerializer - data_serializer_factory - DiskStorageIO - EmbeddingData - EmbeddingResponse - EmbeddingSupport - EmbeddingUsageInformation - ErrorDataTypeSerializer - get_all_harm_definitions - group_conversation_message_pieces_by_sequence - group_message_pieces_into_conversations - HarmDefinition - ImagePathDataTypeSerializer - AllowedCategories - AttackOutcome - AttackResult - Message - MessagePiece - NextMessageSystemPromptPaths - PromptDataType - PromptResponseError - QuestionAnsweringDataset - QuestionAnsweringEntry - QuestionChoice - ScaleDescription - ScenarioIdentifier - ScenarioResult - Score - ScoreType - Seed - SeedAttackGroup - SeedAttackTechniqueGroup - SeedDataset - SeedGroup - SeedObjective - SeedPrompt - SeedSimulatedConversation - SeedType - SimulatedTargetSystemPromptPaths - sort_message_pieces - StorageIO - StrategyResult - TextDataTypeSerializer - UnvalidatedScore - VideoPathDataTypeSerializer - - -:py:mod:`pyrit.prompt_converter` -================================ - -.. automodule:: pyrit.prompt_converter - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AddImageTextConverter - AddImageVideoConverter - AddTextImageConverter - AnsiAttackConverter - AsciiArtConverter - AsciiSmugglerConverter - AskToDecodeConverter - AtbashConverter - AudioEchoConverter - AudioFrequencyConverter - AudioSpeedConverter - AudioVolumeConverter - AudioWhiteNoiseConverter - AzureSpeechAudioToTextConverter - AzureSpeechTextToAudioConverter - Base2048Converter - Base64Converter - BinAsciiConverter - BinaryConverter - BrailleConverter - CaesarConverter - CharacterSpaceConverter - CharSwapConverter - CodeChameleonConverter - ColloquialWordswapConverter - ConverterResult - DenylistConverter - DiacriticConverter - EcojiConverter - EmojiConverter - FirstLetterConverter - FlipConverter - get_converter_modalities - HumanInTheLoopConverter - ImageCompressionConverter - IndexSelectionStrategy - InsertPunctuationConverter - JsonStringConverter - KeywordSelectionStrategy - LeetspeakConverter - LLMGenericTextConverter - MaliciousQuestionGeneratorConverter - MathObfuscationConverter - MathPromptConverter - MorseConverter - NatoConverter - NegationTrapConverter - NoiseConverter - PDFConverter - PersuasionConverter - PositionSelectionStrategy - PromptConverter - ProportionSelectionStrategy - QRCodeConverter - RandomCapitalLettersConverter - RandomTranslationConverter - RangeSelectionStrategy - RegexSelectionStrategy - RepeatTokenConverter - ROT13Converter - ScientificTranslationConverter - SearchReplaceConverter - SelectiveTextConverter - SneakyBitsSmugglerConverter - StringJoinConverter - SuffixAppendConverter - SuperscriptConverter - TemplateSegmentConverter - TenseConverter - TextJailbreakConverter - TextSelectionStrategy - TokenSelectionStrategy - ToneConverter - ToxicSentenceGeneratorConverter - TranslationConverter - TransparencyAttackConverter - UnicodeConfusableConverter - UnicodeReplacementConverter - UnicodeSubstitutionConverter - UrlConverter - VariationConverter - VariationSelectorSmugglerConverter - WordDocConverter - WordIndexSelectionStrategy - WordKeywordSelectionStrategy - WordPositionSelectionStrategy - WordProportionSelectionStrategy - WordRegexSelectionStrategy - WordSelectionStrategy - ZalgoConverter - ZeroWidthConverter - -:py:mod:`pyrit.prompt_normalizer` -================================= - -.. automodule:: pyrit.prompt_normalizer - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - PromptNormalizer - PromptConverterConfiguration - NormalizerRequest - -:py:mod:`pyrit.prompt_target` -============================= - -.. automodule:: pyrit.prompt_target - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AzureBlobStorageTarget - AzureMLChatTarget - CopilotType - CrucibleTarget - GandalfLevel - GandalfTarget - get_http_target_json_response_callback_function - get_http_target_regex_matching_callback_function - HTTPTarget - HTTPXAPITarget - HuggingFaceChatTarget - HuggingFaceEndpointTarget - limit_requests_per_minute - OpenAICompletionTarget - OpenAIChatAudioConfig - OpenAIImageTarget - OpenAIChatTarget - OpenAIResponseTarget - OpenAIVideoTarget - OpenAITTSTarget - OpenAITarget - PlaywrightCopilotTarget - PlaywrightTarget - PromptChatTarget - PromptShieldTarget - PromptTarget - RealtimeTarget - TargetCapabilities - TextTarget - WebSocketCopilotTarget - -:py:mod:`pyrit.score` -===================== - -.. automodule:: pyrit.score - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AudioFloatScaleScorer - AudioTrueFalseScorer - AzureContentFilterScorer - BatchScorer - ConsoleScorerPrinter - ContentClassifierPaths - ConversationScorer - create_conversation_scorer - DecodingScorer - FloatScaleScoreAggregator - FloatScaleScorer - FloatScaleScorerAllCategories - FloatScaleScorerByCategory - FloatScaleThresholdScorer - GandalfScorer - HarmHumanLabeledEntry - HarmScorerEvaluator - HarmScorerMetrics - HumanInTheLoopScorerGradio - HumanLabeledDataset - HumanLabeledEntry - InsecureCodeScorer - LikertScaleEvalFiles - LikertScalePaths - MarkdownInjectionScorer - MetricsType - ObjectiveHumanLabeledEntry - ObjectiveScorerEvaluator - ObjectiveScorerMetrics - PlagiarismMetric - PlagiarismScorer - PromptShieldScorer - QuestionAnswerScorer - RefusalScorerPaths - RegistryUpdateBehavior - Scorer - ScorerEvalDatasetFiles - ScorerEvaluator - ScorerMetrics - ScorerMetricsWithIdentity - ScorerPrinter - ScorerPromptValidator - get_all_harm_metrics - get_all_objective_metrics - SelfAskCategoryScorer - SelfAskGeneralFloatScaleScorer - SelfAskGeneralTrueFalseScorer - SelfAskLikertScorer - SelfAskQuestionAnswerScorer - SelfAskRefusalScorer - SelfAskScaleScorer - SelfAskTrueFalseScorer - SubStringScorer - TrueFalseAggregatorFunc - TrueFalseCompositeScorer - TrueFalseInverterScorer - TrueFalseQuestion - TrueFalseQuestionPaths - TrueFalseScoreAggregator - TrueFalseScorer - VideoFloatScaleScorer - VideoTrueFalseScorer - -:py:mod:`pyrit.scenario` -========================= - -.. automodule:: pyrit.scenario - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - AtomicAttack - DatasetConfiguration - Scenario - ScenarioCompositeStrategy - ScenarioStrategy - -:py:mod:`pyrit.scenario.airt` -============================= - -.. automodule:: pyrit.scenario.airt - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - ContentHarms - ContentHarmsStrategy - Cyber - CyberStrategy - -:py:mod:`pyrit.scenario.foundry` -================================ - -.. automodule:: pyrit.scenario.foundry - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - FoundryScenario - FoundryStrategy - RedTeamAgent - -:py:mod:`pyrit.scenario.garak` -============================== - -.. automodule:: pyrit.scenario.garak - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - Encoding - EncodingStrategy - -:py:mod:`pyrit.setup` -===================== - -.. automodule:: pyrit.setup - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - initialize_pyrit_async - AZURE_SQL - SQLITE - IN_MEMORY - -:py:mod:`pyrit.setup.initializers` -================================== - -.. automodule:: pyrit.setup.initializers - :no-members: - :no-inherited-members: - -.. autosummary:: - :nosignatures: - :toctree: _autosummary/ - - PyRITInitializer - AIRTInitializer - TargetInitializer - ScorerInitializer - SimpleInitializer - LoadDefaultDatasets - ScenarioObjectiveListInitializer diff --git a/doc/banner.png b/doc/banner.png new file mode 100644 index 0000000000..eb1f4dc78d Binary files /dev/null and b/doc/banner.png differ diff --git a/doc/conf.py b/doc/conf.py deleted file mode 100644 index 5083230fb1..0000000000 --- a/doc/conf.py +++ /dev/null @@ -1,4 +0,0 @@ -import os -import sys - -sys.path.insert(0, os.path.abspath("../..")) diff --git a/doc/css/custom.css b/doc/css/custom.css new file mode 100644 index 0000000000..9024dfb49d --- /dev/null +++ b/doc/css/custom.css @@ -0,0 +1,83 @@ +/* Remove white background from logo in dark mode */ +.myst-home-link-logo { + background: transparent !important; + background-color: transparent !important; +} + +/* Make split-image hero full-width with banner as background */ +.myst-landing-split-img { + position: relative !important; + overflow: hidden !important; + margin-left: calc(-50vw + 50%) !important; + margin-right: calc(-50vw + 50%) !important; + width: 100vw !important; + max-width: 100vw !important; +} + +.myst-landing-split-img-image { + position: absolute !important; + top: 0 !important; + left: 0 !important; + width: 100% !important; + height: 100% !important; + z-index: 0 !important; +} + +.myst-landing-split-img-image img { + width: 100% !important; + height: 100% !important; + object-fit: cover !important; +} + +.myst-landing-split-img-content-wrapper { + position: relative !important; + z-index: 1 !important; + min-height: 60vw !important; + max-height: 80vh !important; + display: flex !important; + align-items: flex-start !important; + padding-top: 4rem !important; +} + +.myst-landing-split-img-content { + width: 50% !important; + margin-left: 2rem !important; +} + +/* Title: white, bigger, bolder */ +.myst-landing-split-img-heading { + color: #fff !important; + font-size: 4rem !important; + font-weight: 800 !important; + text-shadow: 0 3px 12px rgba(0,0,0,0.7) !important; +} + +/* Subtitle: white, bold */ +.myst-landing-split-img-subtitle { + color: #fff !important; + font-size: 1.3rem !important; + font-weight: 700 !important; + text-shadow: 0 2px 8px rgba(0,0,0,0.6) !important; +} + +/* Body text: white, readable */ +.myst-landing-split-img-body, +.myst-landing-split-img-body p { + color: #fff !important; + font-size: 1.1rem !important; + font-weight: 500 !important; + text-shadow: 0 2px 6px rgba(0,0,0,0.6) !important; +} + +/* Darken left side for text readability */ +.myst-landing-split-img::before { + content: ''; + position: absolute; + top: 0; + left: 0; + width: 65%; + height: 100%; + background: linear-gradient(to right, rgba(0,0,0,0.7), rgba(0,0,0,0.3) 70%, transparent); + z-index: 0; + pointer-events: none; +} diff --git a/doc/index.md b/doc/index.md index 20018f5a32..055ec4ea7c 100644 --- a/doc/index.md +++ b/doc/index.md @@ -1,246 +1,121 @@ -# PyRIT +--- +title: PyRIT — Python Risk Identification Tool +site: + hide_title_block: true + hide_toc: true + hide_outline: true +--- -Welcome to the Python Risk Identification Tool for generative AI (PyRIT)! PyRIT is designed to be a flexible and extensible tool that can be used to assess the security and safety issues of generative AI systems in a variety of ways. ++++ { "kind": "split-image" } -Before starting with AI Red Teaming, we recommend reading the following article from Microsoft: -["Planning red teaming for large language models (LLMs) and their applications"](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/red-teaming). +PyRIT -Generative AI systems introduce many categories of risk, which can be difficult to mitigate even with a red teaming -plan in place. To quote the article above, "with LLMs, both benign and adversarial usage can produce -potentially harmful outputs, which can take many forms, including harmful content such as hate speech, -incitement or glorification of violence, or sexual content." Additionally, a variety of security risks -can be introduced by the deployment of an AI system. +## Python Risk Identification Tool + +Automated and human-led AI red teaming — a flexible, extensible framework for assessing the security and safety of generative AI systems at scale. + +![](banner.png) + ++++ { "kind": "justified" } + +What PyRIT Offers + +## Key Capabilities + +:::::{grid} 1 2 3 3 + +::::{card} +🎯 **Automated Red Teaming** + +Run multi-turn attack strategies like Crescendo, TAP, and Skeleton Key against AI systems with minimal setup. Single-turn and multi-turn attacks supported out of the box. +:::: + +::::{card} +📦 **Scenario Framework** + +Run standardized evaluation scenarios at large scale — covering content harms, psychosocial risks, data leakage, and more. Compose strategies and datasets for repeatable, comprehensive assessments across hundreds of objectives. +:::: + +::::{card} +🖥️ **CoPyRIT** + +A graphical user interface for human-led red teaming. Interact with AI systems directly, track findings, and collaborate with your team — all from a modern web UI. +:::: + +::::{card} +🔌 **Any Target** + +Test OpenAI, Azure, Anthropic, Google, HuggingFace, custom HTTP endpoints or WebSockets, web app targets with Playwright, or build your own with a simple interface. +:::: + +::::{card} +💾 **Built-in Memory** + +Track all conversations, scores, and attack results with SQLite or Azure SQL. Export, analyze, and share results with your team. +:::: + +::::{card} +📊 **Flexible Scoring** + +Evaluate AI responses with true/false, Likert scale, classification, and custom scorers — powered by LLMs, Azure AI Content Safety, or your own logic. +:::: + +::::: + ++++ { "kind": "justified" } ## Installation Guide -PyRIT offers flexible installation options to suit different needs. Choose the path that best fits your use case: +PyRIT offers flexible installation options to suit different needs. Choose the path that best fits your use case. -```{important} +::::{important} **Version Compatibility:** - **User installations** (Docker, Pip/Conda) install the **latest stable release** from PyPI - **Contributor installations** (DevContainers, Local Development) use the **latest development code** from the `main` branch -- Always match your notebooks to your PyRIT version - download from the corresponding release branch if using a stable release -``` +- Always match your notebooks to your PyRIT version +:::: -::::{grid} 1 1 2 2 +:::::{grid} 1 1 2 2 :gutter: 3 -:::{grid-item-card} 🐋 Docker Installation +::::{card} 🐋 Docker Installation :link: setup/1b_install_docker -:shadow: md -:class-header: bg-light -:link-type: doc - **For Users - Quick Start** ⭐ -^^^ - Get started immediately with a pre-configured environment: - - ✅ All dependencies included - ✅ No Python setup needed - ✅ JupyterLab built-in - ✅ Works on all platforms +:::: -+++ - -**Best for:** First-time users who want to start quickly without environment setup. - -::: - -:::{grid-item-card} 🐍 Local Pip/uv Installation +::::{card} ☀️ Local Pip/uv Installation :link: setup/1a_install_uv -:shadow: md -:class-header: bg-light -:link-type: doc - **For Users - Custom Setup** -^^^ - Install PyRIT directly on your machine: - - ✅ Full Python environment control - ✅ Lighter weight installation - ✅ Easy integration with existing workflows -- ✅ Direct system access - -+++ - -**Best for:** Users comfortable with Python environments or integrating PyRIT into existing projects. - -::: +:::: -:::{grid-item-card} 🐋 DevContainers in VS Code +::::{card} 🐋 DevContainers in VS Code :link: contributing/1b_install_devcontainers -:shadow: md -:class-header: bg-light -:link-type: doc - **For Contributors** ⭐ -^^^ - Standardized development environment: - - ✅ Pre-configured VS Code setup - ✅ Consistent across all contributors - ✅ All extensions pre-installed -- ✅ Zero configuration needed - -+++ - -**Best for:** Contributors using VS Code who want a ready-to-go development environment. - -::: +:::: -:::{grid-item-card} 🐍 Local uv Development +::::{card} ☀️ Local uv Development :link: contributing/1a_install_uv -:shadow: md -:class-header: bg-light -:link-type: doc - **For Contributors - Custom Dev Setup** -^^^ - Install from source in editable mode: - - ✅ Full development control - ✅ Use any IDE or editor - ✅ Customize environment -- ✅ Advanced configuration options - -+++ - -**Best for:** Contributors who prefer custom development setups or don't use VS Code. - -::: - -:::: - -## Recommended Docs Reading Order - -There is no single way to read the documentation, and it's perfectly fine to jump around. However, here is a recommended reading order. Note that in many sections there are numbered documentation pages. If there is no number attached, it is supplemental and the recommended reading order would be to skip it on a first pass. - -::::{grid} 1 -:gutter: 2 - -:::{grid-item-card} 1️⃣ Cookbooks -:link: cookbooks/README -:shadow: md -:class-header: bg-light -:link-type: doc - -This provides an overview of PyRIT in action. A great place to see practical examples and get started quickly. - -::: - -:::{grid-item-card} 2️⃣ Architecture -:link: code/architecture -:shadow: md -:class-header: bg-light -:link-type: doc - -High-level overview of all PyRIT components. Understanding any single component is easier with knowledge of the others. - -::: - -:::{grid-item-card} 3️⃣ Attacks -:link: code/executor/0_executor -:shadow: md -:class-header: bg-light -:link-type: doc - -Top-level components implementing end-to-end attack techniques. Shows how all PyRIT components fit together. - -::: - -:::{grid-item-card} 4️⃣ Datasets -:link: code/datasets/0_dataset -:shadow: md -:class-header: bg-light -:link-type: doc - -Building attacks using seed prompts and fetching datasets. The foundation for creating test scenarios. - -::: - -:::{grid-item-card} 5️⃣ Targets -:link: code/targets/0_prompt_targets -:shadow: md -:class-header: bg-light -:link-type: doc - -Endpoints that PyRIT sends prompts to. Essential for nearly any PyRIT scenario - learn what targets are available. - -::: - -:::{grid-item-card} 6️⃣ Converters -:link: code/converters/0_converters -:shadow: md -:class-header: bg-light -:link-type: doc - -Transform prompts from one format to another. One of the most powerful capabilities within PyRIT. - -::: - -:::{grid-item-card} 7️⃣ Scorers -:link: code/scoring/0_scoring -:shadow: md -:class-header: bg-light -:link-type: doc - -How PyRIT makes decisions and records output. Essential for evaluating AI system responses. - -::: - -:::{grid-item-card} 8️⃣ Memory -:link: code/memory/0_memory -:shadow: md -:class-header: bg-light -:link-type: doc - -How PyRIT components communicate state information. Understand the data flow and storage mechanisms. - -::: - -:::{grid-item-card} 9️⃣ Auxiliary Attacks (Optional) -:link: code/auxiliary_attacks/0_auxiliary_attacks -:shadow: md -:class-header: bg-light -:link-type: doc - -Attacks and techniques that don't fit into core PyRIT functionality. Advanced and specialized methods. - -::: - -:::: - -### Additional Resources - -::::{grid} 1 1 2 2 -:gutter: 2 - -:::{grid-item-card} 🚀 Deployment -:link: deployment/README -:shadow: md -:class-header: bg-light -:link-type: doc - -Code to download, deploy, and score open-source models (such as Hugging Face) on Azure. - -::: - -:::{grid-item-card} 📰 Blog -:link: blog/README -:shadow: md -:class-header: bg-light -:link-type: doc - -Notable new changes and updates. Stay current with the latest PyRIT developments. - -::: - :::: +::::: diff --git a/doc/myst.yml b/doc/myst.yml new file mode 100644 index 0000000000..a8dd9351ce --- /dev/null +++ b/doc/myst.yml @@ -0,0 +1,253 @@ +version: 1 +project: + title: PyRIT Documentation + authors: + - name: Microsoft AI Red Team + copyright: Copyright 2024, Microsoft AI Red Team + github: Azure/PyRIT + bibliography: + - references.bib + exports: + - format: pdf + template: plain_latex_book + output: exports/book.pdf + toc: + - file: index.md + - file: cookbooks/README.md + children: + - file: cookbooks/1_sending_prompts.ipynb + - file: cookbooks/2_precomputing_turns.ipynb + - file: cookbooks/3_copyright_violations.ipynb + - file: cookbooks/4_testing_bias.ipynb + - file: cookbooks/5_psychosocial_harms.ipynb + - file: setup/1a_install_uv.md + children: + - file: setup/1b_install_docker.md + - file: setup/1c_install_conda.md + - file: setup/jupyter_setup.md + - file: setup/populating_secrets.md + - file: setup/pyrit_conf.md + - file: setup/use_azure_sql_db.md + - file: contributing/README.md + children: + - file: contributing/1a_install_uv.md + - file: contributing/1b_install_devcontainers.md + - file: contributing/1c_install_conda.md + - file: contributing/2_git.md + - file: contributing/3_incorporating_research.md + - file: contributing/4_style_guide.md + - file: contributing/5_running_tests.md + - file: contributing/6_unit_tests.md + - file: contributing/7_integration_tests.md + - file: contributing/8_notebooks.md + - file: contributing/9_pre_commit.md + - file: contributing/10_exception.md + - file: contributing/11_release_process.md + - file: code/architecture.md + - file: code/user_guide.md + children: + - file: code/datasets/0_dataset.md + children: + - file: code/datasets/1_loading_datasets.ipynb + - file: code/datasets/2_seed_programming.ipynb + - file: code/datasets/3_dataset_writing.md + - file: code/datasets/4_dataset_coding.ipynb + - file: code/executor/0_executor.md + children: + - file: code/executor/attack/0_attack.md + children: + - file: code/executor/attack/1_prompt_sending_attack.ipynb + - file: code/executor/attack/2_red_teaming_attack.ipynb + - file: code/executor/attack/3_crescendo_attack.ipynb + - file: code/executor/attack/chunked_request_attack.ipynb + - file: code/executor/attack/context_compliance_attack.ipynb + - file: code/executor/attack/flip_attack.ipynb + - file: code/executor/attack/many_shot_jailbreak_attack.ipynb + - file: code/executor/attack/multi_prompt_sending_attack.ipynb + - file: code/executor/attack/role_play_attack.ipynb + - file: code/executor/attack/skeleton_key_attack.ipynb + - file: code/executor/attack/tap_attack.ipynb + - file: code/executor/attack/violent_durian_attack.ipynb + - file: code/executor/workflow/0_workflow.md + children: + - file: code/executor/workflow/1_xpia_website.ipynb + - file: code/executor/workflow/2_xpia_ai_recruiter.ipynb + - file: code/executor/benchmark/0_benchmark.md + children: + - file: code/executor/benchmark/1_qa_benchmark.ipynb + - file: code/executor/promptgen/0_promptgen.md + children: + - file: code/executor/promptgen/1_anecdoctor_generator.ipynb + - file: code/executor/promptgen/fuzzer_generator.ipynb + - file: code/targets/0_prompt_targets.md + children: + - file: code/targets/1_openai_chat_target.ipynb + - file: code/targets/2_openai_responses_target.ipynb + - file: code/targets/3_openai_image_target.ipynb + - file: code/targets/4_openai_video_target.ipynb + - file: code/targets/5_openai_tts_target.ipynb + - file: code/targets/6_custom_targets.ipynb + - file: code/targets/7_non_open_ai_chat_targets.ipynb + - file: code/targets/8_non_llm_targets.ipynb + - file: code/targets/9_rate_limiting.ipynb + - file: code/targets/10_http_target.ipynb + - file: code/targets/11_message_normalizer.ipynb + - file: code/targets/10_1_playwright_target.ipynb + - file: code/targets/10_2_playwright_target_copilot.ipynb + - file: code/targets/10_3_websocket_copilot_target.ipynb + - file: code/targets/open_ai_completions.ipynb + - file: code/targets/prompt_shield_target.ipynb + - file: code/targets/realtime_target.ipynb + - file: code/targets/use_huggingface_chat_target.ipynb + - file: code/converters/0_converters.ipynb + children: + - file: code/converters/1_text_to_text_converters.ipynb + - file: code/converters/2_audio_converters.ipynb + - file: code/converters/3_image_converters.ipynb + - file: code/converters/4_video_converters.ipynb + - file: code/converters/5_file_converters.ipynb + - file: code/converters/6_selectively_converting.ipynb + - file: code/converters/7_human_converter.ipynb + - file: code/scoring/0_scoring.md + children: + - file: code/scoring/1_azure_content_safety_scorers.ipynb + - file: code/scoring/2_true_false_scorers.ipynb + - file: code/scoring/3_classification_scorers.ipynb + - file: code/scoring/4_likert_scorers.ipynb + - file: code/scoring/5_human_in_the_loop_scorer.ipynb + - file: code/scoring/6_refusal_scorer.ipynb + - file: code/scoring/7_batch_scorer.ipynb + - file: code/scoring/8_scorer_metrics.ipynb + - file: code/scoring/insecure_code_scorer.ipynb + - file: code/scoring/persuasion_full_conversation_scorer.ipynb + - file: code/scoring/prompt_shield_scorer.ipynb + - file: code/scoring/generic_scorers.ipynb + - file: code/memory/0_memory.md + children: + - file: code/memory/1_sqlite_memory.ipynb + - file: code/memory/2_basic_memory_programming.ipynb + - file: code/memory/3_memory_data_types.md + - file: code/memory/4_manually_working_with_memory.md + - file: code/memory/5_memory_labels.ipynb + - file: code/memory/6_azure_sql_memory.ipynb + - file: code/memory/7_azure_sql_memory_attacks.ipynb + - file: code/memory/8_seed_database.ipynb + - file: code/memory/9_exporting_data.ipynb + - file: code/memory/10_schema_diagram.md + - file: code/memory/embeddings.ipynb + - file: code/setup/0_setup.md + children: + - file: code/setup/1_configuration.ipynb + - file: code/setup/2_resiliency.ipynb + - file: code/setup/default_values.md + - file: code/setup/pyrit_initializer.ipynb + - file: code/auxiliary_attacks/0_auxiliary_attacks.ipynb + children: + - file: code/auxiliary_attacks/1_gcg_azure_ml.ipynb + - file: code/scenarios/0_scenarios.ipynb + children: + - file: code/scenarios/1_configuring_scenarios.ipynb + - file: code/registry/0_registry.md + children: + - file: code/registry/1_class_registry.ipynb + - file: code/registry/2_instance_registry.ipynb + - file: code/gui/0_gui.md + - file: code/front_end/0_front_end.md + children: + - file: code/front_end/1_pyrit_scan.ipynb + - file: code/front_end/2_pyrit_shell.md + - file: deployment/README.md + children: + - file: deployment/deploy_hf_model_aml.ipynb + - file: deployment/download_and_register_hf_model_aml.ipynb + - file: deployment/hf_aml_model_endpoint_guide.md + - file: deployment/score_aml_endpoint.ipynb + - file: deployment/troubleshooting_guide_hf_azureml.md + - file: api/index.md + children: + - file: api/pyrit_analytics.md + - file: api/pyrit_auth.md + - file: api/pyrit_common.md + - file: api/pyrit_embedding.md + - file: api/pyrit_exceptions.md + - file: api/pyrit_executor_attack.md + children: + - file: api/pyrit_executor_attack_single_turn.md + - file: api/pyrit_executor_attack_multi_turn.md + - file: api/pyrit_executor_attack_core.md + - file: api/pyrit_executor_attack_component.md + - file: api/pyrit_executor_attack_printer.md + - file: api/pyrit_executor_benchmark.md + - file: api/pyrit_executor_core.md + - file: api/pyrit_executor_promptgen.md + children: + - file: api/pyrit_executor_promptgen_core.md + - file: api/pyrit_executor_promptgen_fuzzer.md + - file: api/pyrit_executor_workflow.md + - file: api/pyrit_identifiers.md + - file: api/pyrit_memory.md + - file: api/pyrit_message_normalizer.md + - file: api/pyrit_models.md + children: + - file: api/pyrit_models_seeds.md + - file: api/pyrit_prompt_converter.md + - file: api/pyrit_prompt_normalizer.md + - file: api/pyrit_prompt_target.md + - file: api/pyrit_registry.md + children: + - file: api/pyrit_registry_class_registries.md + - file: api/pyrit_registry_instance_registries.md + - file: api/pyrit_scenario.md + children: + - file: api/pyrit_scenario_core.md + - file: api/pyrit_scenario_scenarios_airt.md + - file: api/pyrit_scenario_scenarios_foundry.md + - file: api/pyrit_scenario_scenarios_garak.md + - file: api/pyrit_score.md + - file: api/pyrit_setup.md + children: + - file: api/pyrit_setup_initializers.md + - file: blog/README.md + children: + - file: blog/2025_06_06.md + - file: blog/2025_03_03.md + - file: blog/2025_02_11.md + - file: blog/2025_01_27.md + - file: blog/2025_01_14.md + - file: blog/2024_12_3.md +site: + nav: + - title: Cookbooks + url: /cookbooks/README + - title: Setup + url: /setup/a-install-uv + - title: Contributing + url: /contributing/README + - title: User Guide + children: + - title: Architecture + url: /code/architecture + - title: Datasets + url: /code/datasets/dataset + - title: Attacks + url: /code/executor/attack/attack + - title: Targets + url: /code/targets/prompt-targets + - title: Converters + url: /code/converters/converters + - title: Scoring + url: /code/scoring/scoring + - title: Memory + url: /code/memory/memory + - title: Scenarios + url: /code/scenarios/scenarios + - title: API Reference + url: /api/index + - title: Blog + url: /blog/README + options: + logo: roakey.png + favicon: roakey.png + folders: true + style: ./css/custom.css + template: book-theme diff --git a/pyproject.toml b/pyproject.toml index ed9ab048ed..dbef885bc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,9 +69,10 @@ dependencies = [ # always make sure the individual ones are in sync with the all group dev = [ "feedgen>=1.0.0", + "griffe>=2.0.0", "ipykernel>=6.29.5", "jupyter>=1.1.1", - "jupyter-book==1.0.4", + "jupyter-book>=2.0.0", "jupytext>=1.17.1", "matplotlib>=3.10.0", "mypy>=1.16.0", @@ -85,7 +86,6 @@ dev = [ "pytest-xdist>=3.6.1", "respx>=0.22.0", "ruff>=0.14.4", - "sphinxcontrib-mermaid>=1.0.0", "types-aiofiles>=24.1.0", "types-cachetools>=5.5.0", "types-decorator>=5.1.0", diff --git a/tests/unit/docs/test_api_documentation.py b/tests/unit/docs/test_api_documentation.py deleted file mode 100644 index 8bcc735324..0000000000 --- a/tests/unit/docs/test_api_documentation.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. - -""" -Unit tests to ensure API documentation completeness. - -These tests verify that all public classes and functions exported by PyRIT modules -are documented in doc/api.rst to prevent accidentally missing items in the documentation. -""" - -import importlib -import re -from pathlib import Path - -import pytest - - -def get_api_rst_path() -> Path: - """Get the path to the api.rst file.""" - # Start from this test file and navigate to doc/api.rst - # tests/unit/docs -> tests/unit -> tests -> workspace_root - test_dir = Path(__file__).parent - workspace_root = test_dir.parent.parent.parent - return workspace_root / "doc" / "api.rst" - - -def get_documented_items_from_rst() -> dict[str, set[str]]: - """ - Parse api.rst and extract all documented items by module. - - Returns: - Dictionary mapping module paths to sets of documented class/function names - """ - api_rst = get_api_rst_path() - content = api_rst.read_text() - - documented: dict[str, set[str]] = {} - current_module = None - - # Find module definitions like :py:mod:`pyrit.prompt_converter` - module_pattern = r":py:mod:`([^`]+)`" - # Find documented items in autosummary sections - item_pattern = r"^\s+(\w+)$" - - lines = content.split("\n") - i = 0 - while i < len(lines): - line = lines[i] - - # Check if this is a module definition - module_match = re.match(module_pattern, line) - if module_match: - current_module = module_match.group(1) - documented[current_module] = set() - - # Look ahead for the autosummary section - j = i + 1 - in_autosummary = False - while j < len(lines) and j < i + 200: # Look ahead up to 200 lines for large sections - if ".. autosummary::" in lines[j]: - in_autosummary = True - elif in_autosummary: - item_match = re.match(item_pattern, lines[j]) - if item_match: - documented[current_module].add(item_match.group(1)) - elif ( - lines[j].strip() - and not lines[j].strip().startswith(":") - and not lines[j].strip().startswith("..") - and not lines[j].startswith(" ") - ): - break - j += 1 - - i += 1 - - return documented - - -def get_module_exports(module_path: str) -> set[str]: - """ - Get all exported items from a module's __all__ list. - - Args: - module_path: Full module path like 'pyrit.prompt_converter' - - Returns: - Set of exported class/function names, or empty set if module doesn't exist or has no __all__ - """ - try: - module = importlib.import_module(module_path) - if hasattr(module, "__all__"): - return set(module.__all__) - return set() - except (ImportError, AttributeError): - return set() - - -# Define modules to check and their exclusions -MODULES_TO_CHECK = { - "pyrit.prompt_converter": { - "exclude": {"AllWordsSelectionStrategy"}, # Internal helper, not documented - }, - "pyrit.executor.promptgen.fuzzer": { - "exclude": {"_PromptNode"}, # Exclude private/internal classes - }, - # Additional modules can be added here as their documentation is completed - "pyrit.score": { - "exclude": set(), - }, - "pyrit.prompt_target": { - "exclude": set(), - }, - "pyrit.memory": { - "exclude": set(), - }, - "pyrit.models": { - "exclude": {"AttackResultT", "Seed", "StrategyResultT"}, - }, - "pyrit.executor.attack": { - "exclude": set(), - }, - "pyrit.identifiers": { - "exclude": set(), - }, -} - - -@pytest.mark.parametrize("module_path,config", MODULES_TO_CHECK.items()) -def test_all_exports_documented_in_api_rst(module_path: str, config: dict): - """ - Test that all items in a module's __all__ are documented in api.rst. - - This test helps prevent accidentally forgetting to add new classes/functions - to the API documentation. - """ - # Get what's actually exported by the module - exported = get_module_exports(module_path) - - if not exported: - pytest.skip(f"Module {module_path} has no __all__ or couldn't be imported") - - # Get what's documented in api.rst - documented_by_module = get_documented_items_from_rst() - documented = documented_by_module.get(module_path, set()) - - # Apply exclusions - excluded = config.get("exclude", set()) - exported_to_check = exported - excluded - - # Find missing items - missing = exported_to_check - documented - - # Create a helpful error message - if missing: - missing_list = sorted(missing) - msg = f"\n\nModule '{module_path}' has {len(missing)} undocumented exports:\n" - msg += "\n".join(f" - {item}" for item in missing_list) - msg += f"\n\nPlease add these to doc/api.rst in the :py:mod:`{module_path}` section." - pytest.fail(msg) - - -def test_api_rst_file_exists(): - """Verify that the api.rst file exists.""" - api_rst = get_api_rst_path() - assert api_rst.exists(), f"api.rst not found at {api_rst}" - - -def test_documented_modules_have_all_list(): - """ - Test that all modules we're checking actually have an __all__ list. - - This ensures we're not silently skipping modules that should be tested. - """ - missing_all = [] - - for module_path in MODULES_TO_CHECK: - try: - module = importlib.import_module(module_path) - if not hasattr(module, "__all__"): - missing_all.append(module_path) - except ImportError: - missing_all.append(f"{module_path} (import failed)") - - if missing_all: - msg = "\n\nThe following modules are configured for testing but don't have __all__:\n" - msg += "\n".join(f" - {m}" for m in missing_all) - msg += "\n\nEither add __all__ to these modules or remove them from MODULES_TO_CHECK." - pytest.fail(msg)