diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..f3012aa6 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,45 @@ +name: Publish Docs to GitHub Pages + +on: + push: + branches: + - main + paths: + - 'docs/**' + - 'mkdocs.yml' + - 'src/**' + - '.github/workflows/docs.yml' + workflow_dispatch: + +permissions: + contents: write + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # full history needed for git-committers plugin + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: pip + + - name: Install doc dependencies + run: | + pip install \ + mkdocs-material \ + mkdocstrings[python] \ + mkdocs-git-revision-date-localized-plugin \ + griffe + + - name: Install claimed package (for mkdocstrings introspection) + run: pip install -e . + + - name: Build & deploy docs + run: mkdocs gh-deploy --force --clean --verbose diff --git a/.gitignore b/.gitignore index 41f4e616..da33be62 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ dist *.bak *.swp .DS_Store +site/ diff --git a/docs/assets/logo.png b/docs/assets/logo.png new file mode 100644 index 00000000..4a2fb112 Binary files /dev/null and b/docs/assets/logo.png differ diff --git a/docs/c3/create-gridwrapper.md b/docs/c3/create-gridwrapper.md new file mode 100644 index 00000000..da9d49d4 --- /dev/null +++ b/docs/c3/create-gridwrapper.md @@ -0,0 +1,50 @@ +# create_gridwrapper + +Wraps an existing component to run in parallel over a collection of inputs +using one of several storage backends. + +## CLI + +```bash +c3_create_gridwrapper [options] +``` + +| Option | Type | Default | Description | +|---|---|---|---| +| `source_file` | path | *required* | `.ipynb` or `.py` component to wrap | +| `--backend` | str | `local` | Storage backend (see table below) | +| `--component-inputs` | str | `''` | Comma-separated parameter names that vary per grid cell | +| `--component-dependencies` | str | `''` | Pip dependencies to inject | +| `--repository` | str | | Container registry namespace | +| `--log-level` | str | `WARNING` | Python logging level | + +## Backends + +| Key | Description | +|---|---| +| `local` | Local filesystem, simple parallelism | +| `cos` | IBM COS – iterate over objects in a bucket prefix | +| `s3kv` | MLX S3 key-value store backend | +| `simple_grid_wrapper` | Source-only, minimal overhead | +| `folder_grid_wrapper` | Separate source and target folder | +| `legacy_cos_grid_wrapper` | Older COS format | + +## Python API + +::: claimed.c3.create_gridwrapper + options: + members: + - wrap_component + - create_gridwrapper + +## Example + +```bash +# Wrap a training script to process every CSV in a COS bucket in parallel +c3_create_gridwrapper train_model.py \ + --backend cos \ + --component-inputs input_file \ + --repository docker.io/myuser +``` + +This emits `gw_train_model.py` which, when containerised, launches one worker per input file. diff --git a/docs/c3/create-operator.md b/docs/c3/create-operator.md new file mode 100644 index 00000000..06b20f75 --- /dev/null +++ b/docs/c3/create-operator.md @@ -0,0 +1,38 @@ +# create_operator + +Builds a Docker image and generates KFP, CWL, and Kubernetes descriptors +from a Jupyter notebook, Python script, or R script. + +## CLI + +```bash +c3_create_operator [options] +``` + +| Option | Type | Default | Description | +|---|---|---|---| +| `source_file` | path | *required* | `.ipynb`, `.py`, or `.R` file | +| `--repository` | str | *required* | Container registry namespace, e.g. `docker.io/myuser` | +| `--version` | str | auto | Image tag; auto-detected from `image_version` variable in source | +| `--additional-files` | list | `[]` | Extra files to `ADD` into the image | +| `--dockerfile` | path | auto | Custom Dockerfile template | +| `--log-level` | str | `WARNING` | Python logging level | + +## Python API + +::: claimed.c3.create_operator + options: + members: + - create_operator + - create_dockerfile + +## Output Files + +After a successful run you will find: + +| File | Description | +|---|---| +| `.dockerfile` | Generated Dockerfile | +| `.yaml` | KubeFlow Pipelines component spec | +| `.job.yaml` | Kubernetes Job spec | +| `.cwl` | CWL component descriptor | diff --git a/docs/c3/index.md b/docs/c3/index.md new file mode 100644 index 00000000..7eb7273a --- /dev/null +++ b/docs/c3/index.md @@ -0,0 +1,61 @@ +# C3 – CLAIMED Component Compiler + +C3 automates the transformation of arbitrary code assets into fully portable, executable AI components. + +--- + +## What C3 does + +``` + ┌──────────────────────┐ + │ .ipynb / .py / .R │ ← your code + └──────────┬───────────┘ + │ c3_create_operator + ▼ + ┌──────────────────────────────────────────┐ + │ Dockerfile (build + push) │ + │ KubeFlow component YAML │ + │ Kubernetes Job YAML │ + │ CWL component descriptor │ + └──────────────────────────────────────────┘ +``` + +C3 reads **parameter declarations** from the top of your source file: + +```python +import os + +# description of my_param +my_param = os.environ.get('my_param', 'default_value') +``` + +Each `os.environ.get(...)` line is parsed into a typed, documented parameter +that appears in the generated YAML descriptors and KFP UI. + +--- + +## Modules + +| Module | CLI entry-point | Purpose | +|---|---|---| +| [`create_operator`](create-operator.md) | `c3_create_operator` | Build container images and component descriptors | +| [`create_gridwrapper`](create-gridwrapper.md) | `c3_create_gridwrapper` | Wrap a component for parallel grid execution | +| [`create_containerless_operator`](create-operator.md) | `c3_create_containerless_operator` | Containerless variant (runs in-process) | +| [`operator_utils`](operator-utils.md) | – | Shared helpers (connection strings, logging) | +| `parser` | – | Source-file parameter parser | +| `notebook` | – | Jupyter notebook handler | +| `pythonscript` | – | Python script handler | +| `rscript` | – | R script handler | + +--- + +## Grid Compute Backends + +| Backend key | Description | +|---|---| +| `local` | Plain local filesystem | +| `cos` / `cos_grid_wrapper` | IBM Cloud Object Storage | +| `s3kv` | S3-backed key-value store (MLX) | +| `simple_grid_wrapper` | Minimal wrapper – source folder only | +| `folder_grid_wrapper` | Source **and** target folder variant | +| `legacy_cos_grid_wrapper` | Older COS format, kept for backwards compatibility | diff --git a/docs/c3/operator-utils.md b/docs/c3/operator-utils.md new file mode 100644 index 00000000..d9213acd --- /dev/null +++ b/docs/c3/operator-utils.md @@ -0,0 +1,39 @@ +# operator_utils + +Shared utility helpers used across C3 and the component library. + +## Python API + +::: claimed.c3.operator_utils + +## Connection String Format + +Many CLAIMED components accept a `cos_connection` parameter in the following URI format: + +``` +[cos|s3]://access_key_id:secret_access_key@endpoint_host/bucket/path +``` + +**Examples:** + +``` +s3://AKIAIOSFODNN7EXAMPLE:wJalrXUtnFEMI@s3.us-east-1.amazonaws.com/my-bucket/data/ +cos://mykey:mysecret@s3.eu-de.cloud-object-storage.appdomain.cloud/my-bucket/models/ +``` + +### `explode_connection_string(cs)` + +Parses the URI into its components: + +```python +from claimed.c3.operator_utils import explode_connection_string + +access_key_id, secret_access_key, endpoint, path = explode_connection_string( + 's3://KEY:SECRET@s3.eu-de.cloud-object-storage.appdomain.cloud/my-bucket/prefix' +) +# endpoint → 'https://s3.eu-de.cloud-object-storage.appdomain.cloud' +# path → 'my-bucket/prefix' +``` + +If the string does not start with `cos://` or `s3://`, the input is returned as-is in the `path` field +(useful when passing a plain local path or a Kubernetes secret reference). diff --git a/docs/cli.md b/docs/cli.md new file mode 100644 index 00000000..382ba3cb --- /dev/null +++ b/docs/cli.md @@ -0,0 +1,135 @@ +# CLI Reference + +The `claimed` command is the single entry-point for the CLAIMED framework. + +--- + +## Synopsis + +``` +claimed [options] +``` + +--- + +## Subcommands + +### `claimed run` + +Directly invoke the `run()` function of any CLAIMED Python module. + +``` +claimed run [--param-name value ...] [--help] +``` + +**Arguments** + +| Argument | Description | +|---|---| +| `module.path` | Fully-qualified Python module containing a `run()` function (e.g. `claimed.components.util.cosutils`) | +| `--` | Any parameter accepted by `run()`. Hyphens are converted to underscores. | +| `--help` | Print the function signature, docstring, and parameter list, then exit. | + +**Type coercion** + +String values from the command line are automatically cast to the type declared in the function signature +(annotation or default-value type). +For example, `--recursive true` is cast to `bool` if the parameter is annotated as `bool`. + +**Examples** + +```bash +# List objects in a COS bucket +claimed run claimed.components.util.cosutils \ + --cos-connection s3://KEY:SECRET@endpoint/bucket \ + --operation ls \ + --local-path . + +# Download a file +claimed run claimed.components.util.cosutils \ + --cos-connection s3://KEY:SECRET@endpoint/bucket/file.zip \ + --operation get \ + --local-path . + +# Show help for any module +claimed run claimed.components.util.cosutils --help + +# CPU benchmark +claimed run claimed.components.util.gpu_performance_test \ + --mode cpu \ + --matrix-size 4096 \ + --iterations 100 +``` + +--- + +### `claimed create operator` + +Generate a container image + KFP/CWL/Kubernetes descriptors from a script or notebook. + +``` +claimed create operator [options] +``` + +| Option | Description | +|---|---| +| `--repository` | Container registry namespace, e.g. `docker.io/myuser` | +| `--version` | Image tag (default: auto-detected from script) | +| `--additional-files` | Space-separated list of extra files to bundle | + +Example: + +```bash +claimed create operator my_script.py --repository docker.io/myuser +``` + +--- + +### `claimed create gridwrapper` + +Wrap a component so it executes in parallel over a collection of inputs. + +``` +claimed create gridwrapper [options] +``` + +| Option | Description | +|---|---| +| `--backend` | Storage backend: `local` \| `cos` \| `s3kv` \| `simple_grid_wrapper` \| `folder_grid_wrapper` | +| `--component-inputs` | Comma-separated parameter names that vary across grid cells | +| `--repository` | Container registry namespace | + +Example: + +```bash +claimed create gridwrapper my_script.py \ + --backend cos \ + --component-inputs input_file \ + --repository docker.io/myuser +``` + +--- + +### `claimed --component` *(legacy)* + +Run a component image via Docker. + +``` +claimed --component [--param-name value ...] +``` + +| Option | Description | +|---|---| +| `--component` | Docker image reference, e.g. `docker.io/claimed/my-op:latest` | +| `--` | Environment variable to pass into the container | + +Set `CLAIMED_DATA_PATH` to mount a local directory as `/opt/app-root/src/data` inside the container. + +--- + +## Environment Variables + +| Variable | Effect | +|---|---| +| `CLAIMED_DATA_PATH` | Local path mounted as `/opt/app-root/src/data` when using `--component` | +| `CLAIMED_CONTAINERLESS_OPERATOR_PATH` | Root path for containerless operator resolution | diff --git a/docs/components/util/cosutils.md b/docs/components/util/cosutils.md new file mode 100644 index 00000000..07bc8502 --- /dev/null +++ b/docs/components/util/cosutils.md @@ -0,0 +1,62 @@ +# cosutils + +COS/S3 utility component providing common object-storage operations. + +## CLI + +```bash +claimed run claimed.components.util.cosutils --help +``` + +```bash +claimed run claimed.components.util.cosutils \ + --cos-connection s3://KEY:SECRET@endpoint/bucket/path \ + --operation \ + --local-path \ + [--recursive true] \ + [--log-level DEBUG] +``` + +## Operations + +| `--operation` | Description | +|---|---| +| `ls` | List objects at the path | +| `find` | Recursively find all objects | +| `mkdir` | Create a bucket/prefix | +| `get` | Download object(s) to `local_path` | +| `put` | Upload `local_path` to the COS path | +| `rm` | Delete object(s) | +| `glob` | Return all paths matching a glob pattern | +| `sync_to_cos` | Upload only changed local files to COS | +| `sync_to_local` | Download only changed COS objects to local | + +## Examples + +```bash +# List a bucket +claimed run claimed.components.util.cosutils \ + --cos-connection "s3://KEY:SECRET@s3.eu-de.cloud-object-storage.appdomain.cloud/my-bucket" \ + --operation ls \ + --local-path . + +# Download a single file +claimed run claimed.components.util.cosutils \ + --cos-connection "s3://KEY:SECRET@s3.eu-de.cloud-object-storage.appdomain.cloud/my-bucket/model.zip" \ + --operation get \ + --local-path . + +# Upload an entire directory +claimed run claimed.components.util.cosutils \ + --cos-connection "s3://KEY:SECRET@s3.eu-de.cloud-object-storage.appdomain.cloud/my-bucket/output/" \ + --operation put \ + --local-path ./results \ + --recursive true +``` + +## Python API + +::: claimed.components.util.cosutils + options: + members: + - run diff --git a/docs/components/util/gpu-benchmark.md b/docs/components/util/gpu-benchmark.md new file mode 100644 index 00000000..2000e506 --- /dev/null +++ b/docs/components/util/gpu-benchmark.md @@ -0,0 +1,67 @@ +# gpu_performance_test + +PyTorch HPC benchmark component covering CPU, single-GPU, and multi-node distributed (DDP) workloads. + +## CLI + +```bash +claimed run claimed.components.util.gpu_performance_test --help +``` + +```bash +# CPU matrix-multiply benchmark +claimed run claimed.components.util.gpu_performance_test \ + --mode cpu \ + --matrix-size 4096 \ + --iterations 100 + +# Single GPU full benchmark +claimed run claimed.components.util.gpu_performance_test \ + --mode single_gpu \ + --steps 50 + +# Multi-node DDP (via torchrun) +torchrun --nnodes=2 --nproc_per_node=4 \ + -m claimed.components.util.gpu_performance_test \ + --mode ddp +``` + +## Benchmark Phases + +| Phase | Metric | Description | +|---|---|---| +| DataLoader throughput | samples/sec | Measures IO / preprocessing pipeline speed | +| Training throughput | samples/sec | Forward + backward + optimiser step | +| Inference throughput | samples/sec | Forward pass only, `torch.no_grad()` | +| GPU compute | GFLOPS | Dense matrix-multiply (`torch.mm`) | +| CPU compute | GFLOPS | Same on CPU tensors | + +## Parameters + +| Parameter | Type | Default | Description | +|---|---|---|---| +| `mode` | str | `single_gpu` | `cpu` \| `single_gpu` \| `ddp` | +| `batch_size` | int | 256 | DataLoader batch size | +| `num_workers` | int | 4 | DataLoader worker processes | +| `dataset_size` | int | 100 000 | Total synthetic samples | +| `steps` | int | 100 | Batches per benchmark phase | +| `input_dim` | int | 1 024 | MLP input feature dimension | +| `hidden_dim` | int | 2 048 | MLP hidden layer width | +| `num_classes` | int | 10 | Output classes | +| `depth` | int | 3 | Number of hidden layers | +| `materialize_dir` | str | `None` | Cache synthetic data on disk | +| `cleanup` | bool | `False` | Delete `materialize_dir` after benchmark | +| `matrix_size` | int | 2 048 | Square matrix edge for compute test | +| `iterations` | int | 50 | Matrix-multiply iterations | + +## Python API + +::: claimed.components.util.gpu_performance_test + options: + members: + - run + - benchmark_cpu + - benchmark_gpu + - benchmark_training + - benchmark_inference + - benchmark_dataloader diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 00000000..5f9c5982 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,58 @@ +# Contributing + +Thank you for your interest in contributing to CLAIMED! + +## Code of Conduct + +Please read and follow our [Code of Conduct](https://github.com/claimed-framework/claimed/blob/main/CODE_OF_CONDUCT.md). + +## Development Setup + +```bash +git clone https://github.com/claimed-framework/claimed.git +cd claimed +pip install -e ".[dev]" +pip install -r test_requirements.txt +``` + +## Running Tests + +```bash +pytest tests/ +``` + +## Adding a New Component + +1. Create a directory under `src/claimed/components//` +2. Add an empty `__init__.py` +3. Create `.py` with: + - Module-level `os.environ.get(...)` parameter declarations + - A `run(...)` function with type annotations and a docstring + - A `main()` entry-point that calls `run()` + - `if __name__ == "__main__": main()` +4. Add a documentation page under `docs/components//.md` +5. Register the page in `mkdocs.yml` under `nav` + +## Improving Documentation + +The docs live in `docs/` and are built with [MkDocs Material](https://squidfunk.github.io/mkdocs-material/). + +Local preview: + +```bash +pip install mkdocs-material mkdocstrings[python] +mkdocs serve +``` + +Then open . + +## Submitting a Pull Request + +1. Fork the repository +2. Create a branch: `git checkout -b feat/my-feature` +3. Commit your changes +4. Push: `git push origin feat/my-feature` +5. Open a Pull Request against `main` + +Please follow the [contribution process](https://github.com/claimed-framework/claimed/blob/main/contribution_process.md) +and the [release process](https://github.com/claimed-framework/claimed/blob/main/release_process.md) docs. diff --git a/docs/getting-started.md b/docs/getting-started.md new file mode 100644 index 00000000..dcabca23 --- /dev/null +++ b/docs/getting-started.md @@ -0,0 +1,125 @@ +# Getting Started + +## Prerequisites + +| Requirement | Version | +|---|---| +| Python | ≥ 3.7 | +| Docker / Podman | any recent version (for building images) | +| pip | ≥ 22 | + +--- + +## Installation + +```bash +pip install claimed +``` + +To install directly from the repository: + +```bash +git clone https://github.com/claimed-framework/claimed.git +cd claimed +pip install -e . +``` + +--- + +## Your First Component + +### 1. Write a Python script (or notebook) + +CLAIMED reads **parameter declarations** from the top of your script – one variable per line, with an optional comment describing it: + +```python title="my_operator.py" +import os + +# input CSV file path +input_file = os.environ.get('input_file', 'data.csv') + +# number of rows to process +num_rows = int(os.environ.get('num_rows', 100)) + +# --- your logic below --- +import pandas as pd +df = pd.read_csv(input_file, nrows=num_rows) +print(df.head()) +``` + +### 2. Build a container image + +```bash +c3_create_operator my_operator.py --repository myregistry/myuser +``` + +C3 will: + +1. Parse the parameter declarations +2. Generate a `Dockerfile` +3. Build and push the image +4. Write a KubeFlow Pipelines component YAML and a Kubernetes Job YAML + +### 3. Run the component + +```bash +# locally via Docker +claimed --component myregistry/myuser/my-operator \ + --input-file data.csv \ + --num-rows 50 + +# or directly as a Python function +claimed run my_operator --input-file data.csv --num-rows 50 +``` + +--- + +## Grid Wrappers + +A **grid wrapper** parallelises a component over a set of inputs: + +```bash +c3_create_gridwrapper my_operator.py \ + --backend cos \ + --component-inputs "input_file" \ + --repository myregistry/myuser +``` + +See the [C3 overview](c3/index.md) for full details. + +--- + +## Using the Component Library + +Every module under `claimed.components` exposes a `run()` function: + +```python +from claimed.components.util.cosutils import run as cos + +cos( + cos_connection='s3://KEY:SECRET@endpoint/bucket/path', + operation='ls', + local_path='.', +) +``` + +Or from the CLI: + +```bash +claimed run claimed.components.util.cosutils \ + --cos-connection s3://KEY:SECRET@endpoint/bucket/path \ + --operation ls \ + --local-path . +``` + +--- + +## Next Steps + +| Topic | Link | +|---|---| +| Full CLI reference | [CLI Reference](cli.md) | +| C3 internals | [C3 – Component Compiler](c3/index.md) | +| MLX asset backend | [MLX Backend](mlx/index.md) | +| COS/S3 utilities | [cosutils](components/util/cosutils.md) | +| GPU benchmarking | [gpu_performance_test](components/util/gpu-benchmark.md) | diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..4aee00a2 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,60 @@ +# CLAIMED Framework + +[![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/6718/badge)](https://bestpractices.coreinfrastructure.org/projects/6718) +[![PyPI](https://img.shields.io/pypi/v/claimed)](https://pypi.org/project/claimed/) +[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/claimed-framework/claimed/blob/main/LICENSE) + +**CLAIMED** is a framework for building, packaging, and executing portable AI components at scale. + +--- + +## What is CLAIMED? + +CLAIMED has three interlocking layers: + +| Layer | Package / module | Purpose | +|---|---|---| +| **C3** – Component Compiler | `claimed.c3` | Turns notebooks, Python scripts, and R scripts into fully containerised, executable AI components | +| **MLX** – ML eXchange backend | `claimed.mlx` | Tracks datasets, models, jobs and other assets; powers the grid-compute backend | +| **Component Library** | `claimed.components.*` | Ready-to-use components for COS/S3 I/O, benchmarking, NLP, training, and more | + +--- + +## Key Features + +- **Zero-boilerplate packaging** – point C3 at any `.ipynb`, `.py`, or `.R` file and get a Docker image plus KFP/CWL/Kubernetes descriptors +- **Grid parallelisation** – distribute work across heterogeneous clusters with a single `claimed run` call +- **MLX asset tracking** – full provenance for every dataset, model, and job +- **CLI-first** – every component is callable as `claimed run --param value` +- **KubeFlow Pipelines & Kubernetes** – first-class output formats + +--- + +## Quick Install + +```bash +pip install claimed +``` + +--- + +## Quick Example + +```bash +# List files in a COS/S3 bucket +claimed run claimed.components.util.cosutils \ + --cos-connection s3://KEY:SECRET@endpoint/bucket \ + --operation ls \ + --local-path . + +# Show all parameters for any module +claimed run claimed.components.util.cosutils --help +``` + +--- + +## Video Introduction + + diff --git a/docs/mlx/cos-backend.md b/docs/mlx/cos-backend.md new file mode 100644 index 00000000..142fad47 --- /dev/null +++ b/docs/mlx/cos-backend.md @@ -0,0 +1,7 @@ +# cos_backend + +Low-level S3/COS file operations powering the MLX backend. + +## Python API + +::: claimed.mlx.cos_backend diff --git a/docs/mlx/index.md b/docs/mlx/index.md new file mode 100644 index 00000000..1208acc6 --- /dev/null +++ b/docs/mlx/index.md @@ -0,0 +1,53 @@ +# MLX Backend + +The **Machine Learning eXchange (MLX)** backend is responsible for tracking and managing all assets +used and produced by the CLAIMED framework. + +--- + +## What MLX tracks + +| Asset type | Description | +|---|---| +| **Datasets** | Input/output data files stored in S3/COS | +| **Models** | Trained model artefacts | +| **Jobs** | Execution records and logs | +| **Pipeline runs** | End-to-end provenance graphs | + +--- + +## Architecture + +``` + claimed.c3 grid wrappers + │ + ▼ + claimed.mlx.s3_kv_store ← key-value abstraction over S3/COS + │ + ▼ + claimed.mlx.cos_backend ← low-level S3/COS operations (s3fs) + │ + ▼ + S3 / IBM COS +``` + +--- + +## Modules + +| Module | Description | +|---|---| +| [`cos_backend`](cos-backend.md) | Low-level S3/COS file operations | +| [`s3_kv_store`](s3-kv-store.md) | Key-value store abstraction used by grid wrappers | + +--- + +## Configuration + +The MLX backend is configured through connection strings in the standard CLAIMED format: + +``` +s3://access_key_id:secret_access_key@endpoint_host/bucket/prefix +``` + +See [operator_utils](../c3/operator-utils.md) for the full connection string specification. diff --git a/docs/mlx/s3-kv-store.md b/docs/mlx/s3-kv-store.md new file mode 100644 index 00000000..85675c4f --- /dev/null +++ b/docs/mlx/s3-kv-store.md @@ -0,0 +1,7 @@ +# s3_kv_store + +Key-value store abstraction over S3/COS used by C3 grid wrappers to coordinate parallel work. + +## Python API + +::: claimed.mlx.s3_kv_store diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..87521e23 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,80 @@ +site_name: CLAIMED Framework +site_url: https://claimed-framework.github.io/claimed/ +site_description: >- + The CLAIMED framework – C3 Component Compiler, MLX backend, + and a library of ready-to-use AI components. +site_author: The CLAIMED authors +repo_name: claimed-framework/claimed +repo_url: https://github.com/claimed-framework/claimed +edit_uri: edit/main/docs/ + +theme: + name: material + logo: assets/logo.png + favicon: assets/logo.png + palette: + - scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: indigo + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + features: + - navigation.tabs + - navigation.sections + - navigation.expand + - navigation.top + - search.highlight + - content.code.copy + - content.action.edit + +plugins: + - search + - mkdocstrings: + handlers: + python: + options: + docstring_style: google + show_source: true + show_root_heading: true + heading_level: 2 + merge_init_into_class: true + +markdown_extensions: + - admonition + - pymdownx.details + - pymdownx.superfences + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.tabbed: + alternate_style: true + - attr_list + - md_in_html + - toc: + permalink: true + +nav: + - Home: index.md + - Getting Started: getting-started.md + - CLI Reference: cli.md + - C3 – Component Compiler: + - Overview: c3/index.md + - create_operator: c3/create-operator.md + - create_gridwrapper: c3/create-gridwrapper.md + - operator_utils: c3/operator-utils.md + - MLX Backend: + - Overview: mlx/index.md + - cos_backend: mlx/cos-backend.md + - s3_kv_store: mlx/s3-kv-store.md + - Components: + - util/cosutils: components/util/cosutils.md + - util/gpu_performance_test: components/util/gpu-benchmark.md + - Contributing: contributing.md diff --git a/src/claimed/components/util/gpu_performance_test.py b/src/claimed/components/util/gpu_performance_test.py index d7e40adc..2af0a811 100644 --- a/src/claimed/components/util/gpu_performance_test.py +++ b/src/claimed/components/util/gpu_performance_test.py @@ -205,83 +205,108 @@ def benchmark_gpu(matrix_size, iterations, device): # Main # ===================== -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument("--mode", choices=["cpu", "single_gpu", "ddp"], required=True) - parser.add_argument("--batch_size", type=int, default=256) - parser.add_argument("--num_workers", type=int, default=4) - parser.add_argument("--dataset_size", type=int, default=100000) - parser.add_argument("--steps", type=int, default=100) - parser.add_argument("--input_dim", type=int, default=1024) - parser.add_argument("--hidden_dim", type=int, default=2048) - parser.add_argument("--num_classes", type=int, default=10) - parser.add_argument("--depth", type=int, default=3) - parser.add_argument("--materialize_dir", type=str, default=None) - parser.add_argument("--cleanup", action="store_true") - parser.add_argument("--matrix_size", type=int, default=2048) - parser.add_argument("--iterations", type=int, default=50) - - args = parser.parse_args() - - if args.mode == "cpu": - print("CPU GFLOPS:", benchmark_cpu(args.matrix_size, args.iterations)) +def run( + mode: str = 'single_gpu', + batch_size: int = 256, + num_workers: int = 4, + dataset_size: int = 100000, + steps: int = 100, + input_dim: int = 1024, + hidden_dim: int = 2048, + num_classes: int = 10, + depth: int = 3, + materialize_dir: str = None, + cleanup: bool = False, + matrix_size: int = 2048, + iterations: int = 50, +) -> None: + """ + Run the PyTorch HPC benchmark. + + mode: benchmark mode: cpu | single_gpu | ddp + batch_size: dataloader batch size + num_workers: dataloader worker processes + dataset_size: total number of synthetic samples + steps: number of batches per benchmark phase + input_dim: input feature dimension of the MLP + hidden_dim: hidden layer width of the MLP + num_classes: number of output classes + depth: number of hidden layers + materialize_dir: directory to cache synthetic dataset on disk (None = lazy) + cleanup: remove materialize_dir after the benchmark + matrix_size: square matrix edge length for compute benchmarks + iterations: number of matrix-multiply iterations for compute benchmarks + """ + if mode == 'cpu': + print('CPU GFLOPS:', benchmark_cpu(matrix_size, iterations)) return - if args.mode == "single_gpu": - device = torch.device("cuda:0") - elif args.mode == "ddp": + if mode == 'single_gpu': + device = torch.device('cuda:0') + elif mode == 'ddp': local_rank = setup_ddp() - device = torch.device(f"cuda:{local_rank}") + device = torch.device(f'cuda:{local_rank}') + else: + raise ValueError(f"Unknown mode '{mode}'. Choose from: cpu | single_gpu | ddp") dataset = SyntheticDataset( - args.dataset_size, - args.input_dim, - args.num_classes, - materialize_dir=args.materialize_dir + dataset_size, + input_dim, + num_classes, + materialize_dir=materialize_dir, ) loader = DataLoader( dataset, - batch_size=args.batch_size, - num_workers=args.num_workers, + batch_size=batch_size, + num_workers=num_workers, pin_memory=True, - shuffle=True + shuffle=True, ) - model = SimpleMLP( - args.input_dim, - args.hidden_dim, - args.num_classes, - args.depth - ).to(device) + model = SimpleMLP(input_dim, hidden_dim, num_classes, depth).to(device) - if args.mode == "ddp": + if mode == 'ddp': model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[device.index]) - print("\n--- DataLoader throughput ---") - dl_tp = benchmark_dataloader(loader, device, args.steps) - print(f"Samples/sec: {dl_tp:.2f}") + print('\n--- DataLoader throughput ---') + print(f'Samples/sec: {benchmark_dataloader(loader, device, steps):.2f}') - print("\n--- Training throughput ---") - train_tp = benchmark_training(model, loader, device, args.steps) - print(f"Samples/sec: {train_tp:.2f}") + print('\n--- Training throughput ---') + print(f'Samples/sec: {benchmark_training(model, loader, device, steps):.2f}') - print("\n--- Inference throughput ---") - infer_tp = benchmark_inference(model, loader, device, args.steps) - print(f"Samples/sec: {infer_tp:.2f}") + print('\n--- Inference throughput ---') + print(f'Samples/sec: {benchmark_inference(model, loader, device, steps):.2f}') - print("\n--- GPU compute ---") - gpu_gflops = benchmark_gpu(args.matrix_size, args.iterations, device) - print(f"GFLOPS: {gpu_gflops:.2f}") + print('\n--- GPU compute ---') + print(f'GFLOPS: {benchmark_gpu(matrix_size, iterations, device):.2f}') - if args.cleanup and args.materialize_dir: - shutil.rmtree(args.materialize_dir, ignore_errors=True) - print("Materialized dataset removed.") + if cleanup and materialize_dir: + shutil.rmtree(materialize_dir, ignore_errors=True) + print('Materialized dataset removed.') - if args.mode == "ddp": + if mode == 'ddp': cleanup_ddp() -if __name__ == "__main__": +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--mode', choices=['cpu', 'single_gpu', 'ddp'], required=True) + parser.add_argument('--batch_size', type=int, default=256) + parser.add_argument('--num_workers', type=int, default=4) + parser.add_argument('--dataset_size', type=int, default=100000) + parser.add_argument('--steps', type=int, default=100) + parser.add_argument('--input_dim', type=int, default=1024) + parser.add_argument('--hidden_dim', type=int, default=2048) + parser.add_argument('--num_classes', type=int, default=10) + parser.add_argument('--depth', type=int, default=3) + parser.add_argument('--materialize_dir', type=str, default=None) + parser.add_argument('--cleanup', action='store_true') + parser.add_argument('--matrix_size', type=int, default=2048) + parser.add_argument('--iterations', type=int, default=50) + args = parser.parse_args() + run(**vars(args)) + + +if __name__ == '__main__': main()