diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..92e5956 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,16 @@ +# WIT_WORLD is consumed at COMPILE time by: +# - hyperlight_wasm_macro (proc-macro) -> std::env::var_os("WIT_WORLD").unwrap() +# - hyperlight_wasm/build.rs -> rerun-if-env-changed=WIT_WORLD +# - hyperlight_wasm_runtime/build.rs -> emits cfg(component) IFF WIT_WORLD is set +# +# Without it, hyperlight_wasm_runtime falls back to the legacy flatbuffer-based +# host-function ABI, while hyperlight-wasm-sandbox (built with WIT_WORLD via the +# proc-macro's host_bindgen!) uses the component-model ABI. Mixing the two yields +# "GuestError: Host function vector parameter missing length" at sandbox start. +# +# `just wasm test` exports WIT_WORLD via its recipe, but bare `cargo` invocations +# (e.g. from an IDE or a developer running `cargo test --manifest-path ...`) +# would otherwise miss it. Cargo looks for `.cargo/config.toml` by walking up the +# CWD tree, so we put it at the repo root to cover every workspace cargo call. +[env] +WIT_WORLD = { value = "src/wasm_sandbox/wit/sandbox-world.wasm", relative = true } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 258e41c..a5d713a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,6 +87,15 @@ jobs: sudo udevadm trigger --name-match=kvm sudo chmod 666 /dev/kvm + - name: Check WIT artifact is in sync with .wit source + # sandbox-world.wasm is the compiled WIT used by host_bindgen!. It + # is consumed by transitive deps (hyperlight-wasm-runtime) before + # our crate's build scripts run, so it must live in git. Detect any + # drift between the committed artifact and the .wit source. + run: | + just wasm guest-compile-wit + git diff --exit-code -- src/wasm_sandbox/wit/sandbox-world.wasm + - name: Build run: just wasm build diff --git a/.gitignore b/.gitignore index d7b3fbb..8a9146e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,11 @@ target/ .venv/ *.aot *.wasm +# Exception: the compiled WIT artifact is checked in as a build input. +# host_bindgen! reads this file *before* our build scripts can run (it is +# consumed by transitive deps like hyperlight-wasm-runtime), so the file +# must exist from `git clone` time. CI regenerates and diffs to catch drift. +!src/wasm_sandbox/wit/sandbox-world.wasm __pycache__/ *.so *.pyd diff --git a/src/hyperlight_sandbox/src/credentials.rs b/src/hyperlight_sandbox/src/credentials.rs new file mode 100644 index 0000000..b36ab8e --- /dev/null +++ b/src/hyperlight_sandbox/src/credentials.rs @@ -0,0 +1,105 @@ +//! Scoped-credential registry for outgoing HTTP requests. +//! +//! A [`CredentialEntry`] binds a logical credential identifier to the +//! metadata required to inject a token header at request time: +//! +//! * `target` — URL-prefix scope. The outgoing-handler only injects +//! the credential when the request URL starts with this prefix. +//! * `header` — HTTP header name (e.g. `"Authorization"`). +//! * `prefix` — Value prefix prepended to the resolved token +//! (e.g. `"Bearer "`). +//! * `resolver` — A host-side callback invoked on every credentialed +//! outgoing request to produce a fresh secret value. The host calls +//! the resolver synchronously from the WASI HTTP dispatch path, so +//! implementations should be fast and (where appropriate) memoise +//! internally. Errors returned by the resolver surface to the guest +//! as a request-level dispatch failure with a host-redacted message. +//! +//! The registry is populated by the host before the guest runs. +//! Guests bind a credential to a specific outgoing request via WIT +//! `attach`. + +use std::collections::HashMap; +use std::fmt; +use std::sync::{Arc, Mutex}; + +/// Host-side callback that produces the secret token value for a +/// credential at request-dispatch time. +/// +/// The returned `String` is treated as the literal token; the host +/// prepends [`CredentialEntry::prefix`] to it to form the outgoing +/// header value. +/// +/// On error, the returned diagnostic string is **dropped** by the +/// outgoing-handler before any guest-visible error is produced — it +/// is neither sent to the guest nor logged by this crate. The wire +/// path surfaces only a fixed `"credential resolver failed"` +/// indication. Resolver authors who need diagnostics should record +/// them inside the resolver itself (e.g. via the host's own logger) +/// before returning the `Err`. +pub type ResolverFn = Arc Result + Send + Sync>; + +/// Metadata for a single scoped credential. +#[derive(Clone)] +pub struct CredentialEntry { + /// URL-prefix scope. Only requests whose URL starts with this + /// value are eligible for credential injection. + pub target: String, + + /// HTTP header name to set (e.g. `"Authorization"`). + pub header: String, + + /// Value prefix prepended to the resolved token + /// (e.g. `"Bearer "`). May be empty. + pub prefix: String, + + /// Resolver callback. Invoked on every credentialed outgoing + /// request; see [`ResolverFn`] for the contract. + pub resolver: ResolverFn, +} + +impl fmt::Debug for CredentialEntry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // The resolver is a function pointer that may close over secret + // material; we never want it (or its captures) to appear in a + // log line, panic message, or `dbg!` output. + f.debug_struct("CredentialEntry") + .field("target", &self.target) + .field("header", &self.header) + .field("prefix", &self.prefix) + .field("resolver", &"") + .finish() + } +} + +impl CredentialEntry { + /// Build a [`CredentialEntry`] whose resolver returns a fixed + /// token string on every invocation. + /// + /// Convenience constructor for tests, examples, and trivially + /// short-lived secrets. Production callers that need refresh + /// behaviour (managed identities, OAuth, …) should construct + /// the entry directly with a custom [`ResolverFn`]. + pub fn with_static_resolver( + target: impl Into, + header: impl Into, + prefix: impl Into, + token: impl Into, + ) -> Self { + let token = token.into(); + Self { + target: target.into(), + header: header.into(), + prefix: prefix.into(), + resolver: Arc::new(move || Ok(token.clone())), + } + } +} + +/// Shared, thread-safe credential registry keyed by credential id. +pub type CredentialRegistry = Arc>>; + +/// Creates an empty credential registry. +pub fn empty_registry() -> CredentialRegistry { + Arc::new(Mutex::new(HashMap::new())) +} diff --git a/src/hyperlight_sandbox/src/lib.rs b/src/hyperlight_sandbox/src/lib.rs index d00e139..8472962 100644 --- a/src/hyperlight_sandbox/src/lib.rs +++ b/src/hyperlight_sandbox/src/lib.rs @@ -3,6 +3,7 @@ extern crate alloc; pub mod cap_fs; +pub mod credentials; pub mod http; pub mod network; pub mod runtime; @@ -17,6 +18,7 @@ pub use cap_fs::{ CapFs, DescriptorFlags, DescriptorStat, DescriptorType, Dir, DirPerms, FilePerms, FsError, OpenFlags, }; +pub use credentials::{CredentialEntry, CredentialRegistry, ResolverFn}; pub use network::{HttpMethod, MethodFilter, NetworkPermission, NetworkPermissions}; use serde::{Deserialize, Serialize}; pub use tools::{ArgType, ToolRegistry, ToolSchema}; @@ -114,6 +116,7 @@ pub trait Guest: Sized { tools: ToolRegistry, network: std::sync::Arc>, fs: std::sync::Arc>, + credentials: CredentialRegistry, ) -> Result; } @@ -138,6 +141,7 @@ pub struct Sandbox { inner: G::Sandbox, network: std::sync::Arc>, fs: std::sync::Arc>, + credentials: CredentialRegistry, } impl Sandbox { @@ -145,8 +149,20 @@ impl Sandbox { pub fn new(guest: G, config: SandboxConfig, tools: ToolRegistry) -> Result { let network = std::sync::Arc::new(std::sync::Mutex::new(NetworkPermissions::new())); let fs = std::sync::Arc::new(std::sync::Mutex::new(CapFs::new())); - let inner = guest.build(config, tools, network.clone(), fs.clone())?; - Ok(Self { inner, network, fs }) + let credentials = credentials::empty_registry(); + let inner = guest.build( + config, + tools, + network.clone(), + fs.clone(), + credentials.clone(), + )?; + Ok(Self { + inner, + network, + fs, + credentials, + }) } /// Create a sandbox with a read-only input directory. @@ -159,8 +175,20 @@ impl Sandbox { let network = std::sync::Arc::new(std::sync::Mutex::new(NetworkPermissions::new())); let fs = CapFs::new().with_input(input_dir)?; let fs = std::sync::Arc::new(std::sync::Mutex::new(fs)); - let inner = guest.build(config, tools, network.clone(), fs.clone())?; - Ok(Self { inner, network, fs }) + let credentials = credentials::empty_registry(); + let inner = guest.build( + config, + tools, + network.clone(), + fs.clone(), + credentials.clone(), + )?; + Ok(Self { + inner, + network, + fs, + credentials, + }) } /// Execute guest code. @@ -212,6 +240,24 @@ impl Sandbox { .map_err(|_| anyhow::anyhow!("network mutex poisoned"))? .allow_domain(target, methods) } + + /// Register a scoped credential that guests can later `attach` to + /// outgoing requests. + /// + /// Must be called before `run()`. Credentials are immutable once + /// registered and persist for the lifetime of the sandbox. + pub fn register_credential(&self, id: impl Into, entry: CredentialEntry) -> Result<()> { + let id = id.into(); + let mut registry = self + .credentials + .lock() + .map_err(|_| anyhow::anyhow!("credential registry mutex poisoned"))?; + if registry.contains_key(&id) { + anyhow::bail!("credential '{}' already registered", id); + } + registry.insert(id, entry); + Ok(()) + } } // --------------------------------------------------------------------------- @@ -350,9 +396,19 @@ where None => vfs, }; let fs = std::sync::Arc::new(std::sync::Mutex::new(vfs)); - let inner = self - .guest - .build(self.config, self.tools, network.clone(), fs.clone())?; - Ok(Sandbox { inner, network, fs }) + let credentials = credentials::empty_registry(); + let inner = self.guest.build( + self.config, + self.tools, + network.clone(), + fs.clone(), + credentials.clone(), + )?; + Ok(Sandbox { + inner, + network, + fs, + credentials, + }) } } diff --git a/src/javascript_sandbox/src/lib.rs b/src/javascript_sandbox/src/lib.rs index 01f0e5c..1984418 100644 --- a/src/javascript_sandbox/src/lib.rs +++ b/src/javascript_sandbox/src/lib.rs @@ -125,6 +125,7 @@ impl Guest for HyperlightJs { tools: ToolRegistry, network: std::sync::Arc>, fs: std::sync::Arc>, + _credentials: hyperlight_sandbox::CredentialRegistry, ) -> Result { JsGuestSandbox::new(config, tools, network, fs) } diff --git a/src/sdk/python/core/hyperlight_sandbox/__init__.py b/src/sdk/python/core/hyperlight_sandbox/__init__.py index f8f25cc..2429e42 100644 --- a/src/sdk/python/core/hyperlight_sandbox/__init__.py +++ b/src/sdk/python/core/hyperlight_sandbox/__init__.py @@ -180,6 +180,39 @@ def output_path(self) -> str | None: def allow_domain(self, target: str, methods: list[str] | None = None) -> None: self._inner.allow_domain(target, methods) + def register_credential( + self, + id: str, + *, + target: str, + header: str = "Authorization", + prefix: str = "Bearer ", + resolver: Callable[[], str], + ) -> None: + """Register a scoped credential for outgoing HTTP requests. + + Must be called before ``run()``. Guest code can then bind the + credential to an individual request via WIT ``attach``. + + Args: + id: Unique identifier for this credential. + target: URL-prefix scope. Only requests whose URL starts + with this value are eligible for injection. + header: HTTP header name to set (default ``Authorization``). + prefix: Value prefix prepended to the resolved token + (default ``Bearer ``). + resolver: A callable invoked with no arguments on every + credentialed outgoing request to produce a fresh token + value as a ``str``. Called synchronously from the host + HTTP dispatch path, so it must be fast and thread-safe; + long-running fetches (e.g. IMDS, OAuth) should be + memoised by the caller. Any exception raised by the + callable surfaces to guest code as a host-redacted + request-level error (only the exception **type name** + is propagated; the message body is dropped). + """ + self._inner.register_credential(id, target, header, prefix, resolver) + def snapshot(self): """Capture the current sandbox state. diff --git a/src/sdk/python/pyo3_common/src/lib.rs b/src/sdk/python/pyo3_common/src/lib.rs index 4176e72..e20f9d1 100644 --- a/src/sdk/python/pyo3_common/src/lib.rs +++ b/src/sdk/python/pyo3_common/src/lib.rs @@ -92,9 +92,8 @@ pub fn resolve_maybe_coroutine<'py>( } let asyncio = py.import("asyncio")?; - match asyncio.call_method1("run", (obj,)) { - Ok(result) => return Ok(result.unbind()), - Err(_) => {} + if let Ok(result) = asyncio.call_method1("run", (obj,)) { + return Ok(result.unbind()); } let resolver = PyModule::from_code( @@ -348,20 +347,14 @@ fn infer_type_from_annotation( // Handle Annotated[T, ...] — unwrap to get the base type T. // typing.get_origin(ann) is typing.Annotated → typing.get_args(ann)[0] is T. let py = annotation.py(); - if let Ok(typing) = py.import("typing") { - if let Ok(origin) = typing.call_method1("get_origin", (&annotation,)) { - // Check if origin is typing.Annotated (available as typing.Annotated since 3.9+) - if let Ok(annotated_type) = typing.getattr("Annotated") { - if origin.is(&annotated_type) { - if let Ok(args) = typing.call_method1("get_args", (&annotation,)) { - // args is a tuple; first element is the base type. - if let Ok(base_type) = args.get_item(0) { - return type_obj_to_arg_type(&base_type); - } - } - } - } - } + if let Ok(typing) = py.import("typing") + && let Ok(origin) = typing.call_method1("get_origin", (&annotation,)) + && let Ok(annotated_type) = typing.getattr("Annotated") + && origin.is(&annotated_type) + && let Ok(args) = typing.call_method1("get_args", (&annotation,)) + && let Ok(base_type) = args.get_item(0) + { + return type_obj_to_arg_type(&base_type); } None diff --git a/src/sdk/python/wasm_backend/.cargo/config.toml b/src/sdk/python/wasm_backend/.cargo/config.toml deleted file mode 100644 index 27b7acc..0000000 --- a/src/sdk/python/wasm_backend/.cargo/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -[env] -WIT_WORLD = { value = "../../../../wasm_sandbox/wit/sandbox-world.wasm", relative = true } \ No newline at end of file diff --git a/src/sdk/python/wasm_backend/src/lib.rs b/src/sdk/python/wasm_backend/src/lib.rs index 4f862a2..b58ae2f 100644 --- a/src/sdk/python/wasm_backend/src/lib.rs +++ b/src/sdk/python/wasm_backend/src/lib.rs @@ -1,7 +1,8 @@ use std::collections::HashMap; +use std::sync::Arc; use hyperlight_sandbox::{ - DEFAULT_HEAP_SIZE, DEFAULT_STACK_SIZE, DirPerms, FilePerms, HttpMethod, Sandbox, + DEFAULT_HEAP_SIZE, DEFAULT_STACK_SIZE, DirPerms, FilePerms, HttpMethod, ResolverFn, Sandbox, SandboxBuilder, SandboxConfig, }; use hyperlight_sandbox_pyo3_common::{ @@ -16,6 +17,45 @@ type WasmSnapshotInner = hyperlight_sandbox::Snapshot<< ::Sandbox as hyperlight_sandbox::GuestSandbox >::SnapshotData>; +/// Buffered credential registration for lazy sandbox init. +struct PendingCredential { + id: String, + target: String, + header: String, + prefix: String, + resolver: ResolverFn, +} + +/// Wrap a Python callable as a [`ResolverFn`] suitable for storage in +/// the credential registry. +/// +/// On each invocation the wrapper re-acquires the Python GIL, calls +/// the supplied callable with no arguments, and extracts the result +/// as a Python `str`. Exceptions are mapped to a redacted Rust error +/// — only the exception **type name** is surfaced, never the message +/// (which may contain secret material assembled by user code). +fn python_callable_to_resolver(callable: Py) -> ResolverFn { + Arc::new(move || -> Result { + Python::attach(|py| { + let bound = callable.bind(py); + match bound.call0() { + Ok(result) => result + .extract::() + .map_err(|_| "credential resolver did not return a str".to_string()), + Err(err) => { + let type_name = err + .get_type(py) + .qualname() + .ok() + .and_then(|n| n.extract::().ok()) + .unwrap_or_else(|| "Exception".to_string()); + Err(format!("python resolver raised {type_name}")) + } + } + }) + }) +} + #[pyclass] pub struct PySnapshot { inner: WasmSnapshotInner, @@ -26,6 +66,7 @@ pub struct WasmSandbox { inner: Option, tools: HashMap>, pending_networks: Vec<(String, Option>)>, + pending_credentials: Vec, config: SandboxConfig, input_dir: Option, output_dir: Option, @@ -48,6 +89,7 @@ impl WasmSandbox { inner: None, tools: HashMap::new(), pending_networks: Vec::new(), + pending_credentials: Vec::new(), config: SandboxConfig { module_path: module_path.to_string(), heap_size: match heap_size { @@ -115,6 +157,19 @@ impl WasmSandbox { .allow_domain(&target, methods) .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; } + for cred in std::mem::take(&mut self.pending_credentials) { + sandbox + .register_credential( + cred.id, + hyperlight_sandbox::CredentialEntry { + target: cred.target, + header: cred.header, + prefix: cred.prefix, + resolver: cred.resolver, + }, + ) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + } self.inner = Some(sandbox); } let sandbox = self.inner.as_mut().unwrap(); @@ -164,6 +219,52 @@ impl WasmSandbox { Ok(()) } + /// Register a scoped credential for outgoing HTTP requests. + /// + /// Must be called before `run()`. The credential can then be + /// attached to individual requests by guest code via WIT `attach`. + /// + /// `resolver` is a Python callable that takes no arguments and + /// returns the secret token as a `str`. It is invoked synchronously + /// from the WASI HTTP dispatch path on every credentialed request, + /// so it must be fast and thread-safe; long-running token fetches + /// should be memoised by the caller. + #[pyo3(signature = (id, target, header, prefix, resolver))] + fn register_credential( + &mut self, + id: &str, + target: &str, + header: &str, + prefix: &str, + resolver: Py, + ) -> PyResult<()> { + let resolver_fn = python_callable_to_resolver(resolver); + if let Some(sandbox) = self.inner.as_ref() { + // Register directly on the live sandbox. + sandbox + .register_credential( + id, + hyperlight_sandbox::CredentialEntry { + target: target.to_string(), + header: header.to_string(), + prefix: prefix.to_string(), + resolver: resolver_fn, + }, + ) + .map_err(|e| PyRuntimeError::new_err(format!("{e}")))?; + } else { + // Buffer for later — will be applied when sandbox initialises. + self.pending_credentials.push(PendingCredential { + id: id.to_string(), + target: target.to_string(), + header: header.to_string(), + prefix: prefix.to_string(), + resolver: resolver_fn, + }); + } + Ok(()) + } + fn get_output_files(&self) -> PyResult> { let sandbox = self .inner diff --git a/src/wasm_sandbox/Justfile b/src/wasm_sandbox/Justfile index 2a47213..5c97398 100644 --- a/src/wasm_sandbox/Justfile +++ b/src/wasm_sandbox/Justfile @@ -53,14 +53,38 @@ invalidate-wit-caches: -{{rmrf}} {{repo-root}}/src/sdk/python/wasm_backend/target/release/build/hyperlight-wasm-* -{{rmrf}} {{repo-root}}/src/sdk/python/wasm_backend/target/release/.fingerprint/hyperlight-wasm-* -# Only clean when rust-cache left stale state (fingerprints exist but runtime binary missing). +# Clean stale hyperlight-wasm caches when ANY of: +# 1. fingerprints exist but the guest runtime binary is missing (rust-cache restored +# .fingerprint/ without the corresponding target/hyperlight-wasm-runtime/ output), OR +# 2. the WIT source file is newer than the cached host bindings/fingerprints. +# hyperlight-wasm-sandbox calls `hyperlight_component_macro::host_bindgen!("wit/sandbox-world.wasm")` +# at compile time. Proc-macros cannot emit `cargo:rerun-if-changed`, so when the WIT +# changes cargo doesn't notice and the SDKs fail at runtime with +# "Host function vector parameter missing length". [unix] _clean-stale-wasm: #!/usr/bin/env bash + wit_src="{{repo-root}}/src/wasm_sandbox/wit/hyperlight-sandbox.wit" for profile in debug release; do - if compgen -G "{{repo-root}}/target/$profile/build/hyperlight-wasm-*" > /dev/null && \ - [ ! -f "{{repo-root}}/target/hyperlight-wasm-runtime/x86_64-hyperlight-none/$profile/hyperlight-wasm-runtime" ]; then - echo "Cleaning stale hyperlight-wasm $profile build artifacts..." + if ! compgen -G "{{repo-root}}/target/$profile/build/hyperlight-wasm-*" > /dev/null; then + continue + fi + reason="" + runtime="{{repo-root}}/target/hyperlight-wasm-runtime/x86_64-hyperlight-none/$profile/hyperlight-wasm-runtime" + if [ ! -f "$runtime" ]; then + reason="runtime binary missing" + elif [ -f "$wit_src" ]; then + for fp in "{{repo-root}}/target/$profile/.fingerprint/hyperlight-wasm-"* \ + "{{repo-root}}/target/$profile/.fingerprint/hyperlight-host-"*; do + [ -e "$fp" ] || continue + if [ "$wit_src" -nt "$fp" ]; then + reason="WIT source newer than cached host bindings" + break + fi + done + fi + if [ -n "$reason" ]; then + echo "Cleaning stale hyperlight-wasm $profile build artifacts ($reason)..." rm -rf {{repo-root}}/target/hyperlight-wasm-runtime {{repo-root}}/target/hls \ {{repo-root}}/target/*/build/hyperlight-wasm-* {{repo-root}}/target/*/.fingerprint/hyperlight-wasm-* \ {{repo-root}}/target/*/build/hyperlight-host-* {{repo-root}}/target/*/.fingerprint/hyperlight-host-* \ @@ -73,12 +97,26 @@ _clean-stale-wasm: [windows] _clean-stale-wasm: + $witSrc = '{{repo-root}}/src/wasm_sandbox/wit/hyperlight-sandbox.wit'; \ + $witMtime = if (Test-Path $witSrc) { (Get-Item $witSrc).LastWriteTimeUtc } else { $null }; \ foreach ($profile in @('debug', 'release')) { \ + $fpDir = '{{repo-root}}/target/' + $profile + '/.fingerprint'; \ $bp = '{{repo-root}}/target/' + $profile + '/build'; \ $rp = '{{repo-root}}/target/hyperlight-wasm-runtime/x86_64-hyperlight-none/' + $profile + '/hyperlight-wasm-runtime'; \ - if ((Get-ChildItem -Path $bp -Filter 'hyperlight-wasm-*' -ErrorAction SilentlyContinue) -and \ - -not (Test-Path $rp)) { \ - Write-Host ('Cleaning stale hyperlight-wasm ' + $profile + ' build artifacts...'); \ + $hlwBuild = @(Get-ChildItem -Path $bp -Filter 'hyperlight-wasm-*' -ErrorAction SilentlyContinue); \ + if ($hlwBuild.Count -eq 0) { continue }; \ + $reason = $null; \ + if (-not (Test-Path $rp)) { \ + $reason = 'runtime binary missing'; \ + } elseif ($witMtime) { \ + $hostFps = @(Get-ChildItem -Path $fpDir -Filter 'hyperlight-wasm-*' -ErrorAction SilentlyContinue) + \ + @(Get-ChildItem -Path $fpDir -Filter 'hyperlight-host-*' -ErrorAction SilentlyContinue); \ + foreach ($fp in $hostFps) { \ + if ($witMtime -gt $fp.LastWriteTimeUtc) { $reason = 'WIT source newer than cached host bindings'; break }; \ + }; \ + }; \ + if ($reason) { \ + Write-Host ('Cleaning stale hyperlight-wasm ' + $profile + ' build artifacts (' + $reason + ')...'); \ Remove-Item -Recurse -Force -ErrorAction SilentlyContinue '{{repo-root}}/target/hyperlight-wasm-runtime','{{repo-root}}/target/hls'; \ foreach ($t in @('{{repo-root}}/target', '{{repo-root}}/src/sdk/python/wasm_backend/target')) { \ foreach ($p in @('debug', 'release')) { \ @@ -91,7 +129,8 @@ _clean-stale-wasm: }; \ break; \ } \ - } + }; \ + exit 0 guest-build-aot target=default-target: guest-build-wasm ensure-tools hyperlight-wasm-aot compile --component \ diff --git a/src/wasm_sandbox/guests/javascript/package-lock.json b/src/wasm_sandbox/guests/javascript/package-lock.json index 817f597..a1f070b 100644 --- a/src/wasm_sandbox/guests/javascript/package-lock.json +++ b/src/wasm_sandbox/guests/javascript/package-lock.json @@ -1,12 +1,12 @@ { "name": "hyperlight-sandbox-js-guest", - "version": "0.3.0", + "version": "0.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "hyperlight-sandbox-js-guest", - "version": "0.3.0", + "version": "0.4.0", "devDependencies": { "@bytecodealliance/componentize-js": "^0.20.0", "@bytecodealliance/jco": "^1.19.0" @@ -216,6 +216,29 @@ "wizer-win32-x64": "wizer" } }, + "node_modules/@emnapi/core": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz", + "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/wasi-threads": "1.2.1", + "tslib": "^2.4.0" + } + }, + "node_modules/@emnapi/runtime": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz", + "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==", + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, "node_modules/@emnapi/wasi-threads": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", diff --git a/src/wasm_sandbox/guests/python/hyperlight.py b/src/wasm_sandbox/guests/python/hyperlight.py index a225fda..57da675 100644 --- a/src/wasm_sandbox/guests/python/hyperlight.py +++ b/src/wasm_sandbox/guests/python/hyperlight.py @@ -1,6 +1,6 @@ """Hyperlight guest-side helpers, available to user code via `from hyperlight import call_tool`.""" from sandbox_executor import _call_tool as call_tool -from sandbox_executor import http_get, http_post +from sandbox_executor import attach_credential, http_get, http_post -__all__ = ["call_tool", "http_get", "http_post"] +__all__ = ["call_tool", "http_get", "http_post", "attach_credential"] diff --git a/src/wasm_sandbox/guests/python/sandbox_executor.py b/src/wasm_sandbox/guests/python/sandbox_executor.py index 83b30c9..3733ec9 100644 --- a/src/wasm_sandbox/guests/python/sandbox_executor.py +++ b/src/wasm_sandbox/guests/python/sandbox_executor.py @@ -15,6 +15,7 @@ import wit_world.imports.tools as tools import wit_world.imports.outgoing_handler as outgoing_handler import wit_world.imports.wasi_http_types as http_types +import wit_world.imports.credentials as credentials def _call_tool(tool_name: str, **kwargs): @@ -26,17 +27,30 @@ def _call_tool(tool_name: str, **kwargs): return json.loads(result_json) -def http_get(url: str) -> dict: - """Make an HTTP GET request via WASI-HTTP. Returns {"status": int, "body": str}.""" - return _http_request("GET", url) +def http_get(url: str, credential: str = None) -> dict: + """Make an HTTP GET request via WASI-HTTP. Returns {"status": int, "body": str}. + If credential is set, attaches the named credential to the request. + The host will inject the credential's header at dispatch time — the + guest never sees the secret value. + """ + return _http_request("GET", url, credential=credential) -def http_post(url: str, body: str = "", content_type: str = "application/json") -> dict: - """Make an HTTP POST request via WASI-HTTP. Returns {"status": int, "body": str}.""" - return _http_request("POST", url, body=body, content_type=content_type) +def http_post(url: str, body: str = "", content_type: str = "application/json", + credential: str = None) -> dict: + """Make an HTTP POST request via WASI-HTTP. Returns {"status": int, "body": str}. -def _http_request(method: str, url: str, body: str = "", content_type: str = "") -> dict: + If credential is set, attaches the named credential to the request. + The host will inject the credential's header at dispatch time — the + guest never sees the secret value. + """ + return _http_request("POST", url, body=body, content_type=content_type, + credential=credential) + + +def _http_request(method: str, url: str, body: str = "", content_type: str = "", + credential: str = None) -> dict: """Internal: make an HTTP request via WASI-HTTP outgoing-handler.""" # Parse URL into scheme, authority, path scheme_str, rest = url.split("://", 1) if "://" in url else ("https", url) @@ -81,6 +95,16 @@ def _http_request(method: str, url: str, body: str = "", content_type: str = "") req.set_authority(authority) req.set_path_with_query(path) + # Attach credential if specified — the host resolves the secret + # and injects the header at dispatch time. + if credential is not None: + try: + credentials.attach(req, credential) + except Exception as e: + raise RuntimeError( + f"Failed to attach credential '{credential}': {e}" + ) from e + # Write body if present if body: outgoing_body = req.body() @@ -138,8 +162,55 @@ def _http_request(method: str, url: str, body: str = "", content_type: str = "") return {"status": status, "body": body_text} +def attach_credential(request, credential_id: str): + """Attach a registered credential to an outgoing-request by name. + + Low-level helper for guests building WASI-HTTP requests manually. + Most callers should use the `credential=` kwarg on `http_get`/`http_post` + instead. + + Raises RuntimeError if the credential is unknown or already attached. + """ + try: + credentials.attach(request, credential_id) + except Exception as e: + raise RuntimeError( + f"Failed to attach credential '{credential_id}': {e}" + ) from e + + class Executor: - """Implements the WIT executor interface for componentize-py.""" + """Implements the WIT executor interface for componentize-py. + + The executor keeps a single, persistent module-level namespace + (``self._globals``) that is reused across every call to :py:meth:`run`. + Names defined by guest code (``x = 1``, ``def foo(): ...``, + ``class C: ...``) therefore remain visible to subsequent runs on + the same sandbox instance, matching: + + * the snapshot/restore contract documented on ``WasmSandbox`` — + ``snapshot``/``restore`` is the mechanism for rewinding state, + not bare back-to-back ``run`` calls; + * the JavaScript guest's ``globalThis`` persistence story for + explicit global writes; + * the ``python_basics`` example, which sets ``counter = 100`` + and treats ``restore`` (not the next ``run``) as the action + that makes ``counter`` undefined. + + Host-provided helpers (``call_tool``, ``http_get``, ``http_post``, + ``attach_credential``) are seeded once on construction. Guest code + may shadow them locally, but the originals are restored by + ``snapshot``/``restore`` along with the rest of the namespace. + """ + + def __init__(self) -> None: + self._globals: dict = { + "__builtins__": __builtins__, + "call_tool": _call_tool, + "http_get": http_get, + "http_post": http_post, + "attach_credential": attach_credential, + } def run(self, code: str) -> ExecutionResult: """Execute Python code and capture output.""" @@ -152,7 +223,7 @@ def run(self, code: str) -> ExecutionResult: exit_code = 0 try: - exec(code, {"__builtins__": __builtins__, "call_tool": _call_tool, "http_get": http_get, "http_post": http_post}) + exec(code, self._globals) except SystemExit as e: exit_code = e.code if isinstance(e.code, int) else 1 except Exception as e: diff --git a/src/wasm_sandbox/src/credentials.rs b/src/wasm_sandbox/src/credentials.rs new file mode 100644 index 0000000..4831777 --- /dev/null +++ b/src/wasm_sandbox/src/credentials.rs @@ -0,0 +1,4 @@ +//! Re-exports from [hyperlight_sandbox::credentials] for use within +//! the Wasm backend. + +pub use hyperlight_sandbox::credentials::*; diff --git a/src/wasm_sandbox/src/lib.rs b/src/wasm_sandbox/src/lib.rs index 4d5b681..10a8015 100644 --- a/src/wasm_sandbox/src/lib.rs +++ b/src/wasm_sandbox/src/lib.rs @@ -14,6 +14,7 @@ use hyperlight_wasm::{ LoadedWasmSandbox, SandboxBuilder as HyperlightSandboxBuilder, Snapshot as WasmSnapshot, }; +pub mod credentials; mod wasi_impl; pub(crate) mod bindings { @@ -31,8 +32,9 @@ impl Guest for Wasm { tools: ToolRegistry, network: std::sync::Arc>, fs: std::sync::Arc>, + credentials: credentials::CredentialRegistry, ) -> Result { - WasmComponentSandbox::with_tools(config, tools, network, fs) + WasmComponentSandbox::with_tools(config, tools, network, fs, credentials) } } @@ -41,6 +43,7 @@ pub struct HostState { pub(crate) fs: Arc>, pub(crate) network: Arc>, pub(crate) active_requests: Arc, + pub(crate) credential_registry: credentials::CredentialRegistry, } #[allow(refining_impl_trait)] @@ -50,6 +53,11 @@ impl bindings::root::component::RootImports for HostState { self } + type Credentials = HostState; + fn credentials(&mut self) -> &mut Self { + self + } + type Environment = HostState; fn environment(&mut self) -> &mut Self { self @@ -216,6 +224,68 @@ impl bindings::hyperlight::sandbox::Tools for HostState { } } +// --------------------------------------------------------------------------- +// Scoped credentials — `attach` implementation (commit C). +// +// The guest calls `attach(request, credential_id)` to bind a +// previously-registered credential to an outgoing HTTP request. +// This records the binding inside the `OutgoingRequest` itself so +// the outgoing-handler dispatch path can later resolve and inject +// the token header. +// +// Error semantics: +// * `Unknown` — credential id not found in the registry. +// * `AlreadyAttached` — the request already has a credential. +// * `ScopeMismatch` / `ResolverFailed` — reserved for the +// outgoing-handler (commit D/E). +// --------------------------------------------------------------------------- +impl + bindings::hyperlight::sandbox::Credentials< + crate::wasi_impl::resource::Resource< + crate::wasi_impl::types::http_outgoing_request::OutgoingRequest, + >, + > for HostState +{ + fn attach( + &mut self, + request: hyperlight_common::resource::BorrowedResourceGuard< + '_, + crate::wasi_impl::resource::Resource< + crate::wasi_impl::types::http_outgoing_request::OutgoingRequest, + >, + >, + credential: String, + ) -> Result< + Result<(), bindings::hyperlight::sandbox::credentials::CredentialError>, + hyperlight_host::HyperlightError, + > { + use bindings::hyperlight::sandbox::credentials::CredentialError; + + use crate::wasi_impl::resource::BlockOn; + + // Verify the credential exists in the host-side registry. + { + let registry = self.credential_registry.lock().map_err(|_| { + hyperlight_host::HyperlightError::Error( + "credential registry mutex poisoned".to_string(), + ) + })?; + if !registry.contains_key(&credential) { + return Ok(Err(CredentialError::Unknown)); + } + } + + // Write the credential binding into the request. + let mut guard = request.write().block_on(); + if guard.attached_credential.is_some() { + return Ok(Err(CredentialError::AlreadyAttached)); + } + guard.attached_credential = Some(credential); + + Ok(Ok(())) + } +} + pub struct WasmComponentSandbox { sandbox: bindings::RootSandbox, fs: Arc>, @@ -227,6 +297,7 @@ impl WasmComponentSandbox { tools: ToolRegistry, network: Arc>, fs: Arc>, + credentials: credentials::CredentialRegistry, ) -> Result { // Verify the shared tokio runtime is available before proceeding. hyperlight_sandbox::runtime::RUNTIME @@ -244,6 +315,7 @@ impl WasmComponentSandbox { fs: fs.clone(), network: network.clone(), active_requests: Arc::new(AtomicUsize::new(0)), + credential_registry: credentials, }; let mut proto = HyperlightSandboxBuilder::new() diff --git a/src/wasm_sandbox/src/wasi_impl/http_handler.rs b/src/wasm_sandbox/src/wasi_impl/http_handler.rs index 47df79a..375727b 100644 --- a/src/wasm_sandbox/src/wasi_impl/http_handler.rs +++ b/src/wasm_sandbox/src/wasi_impl/http_handler.rs @@ -80,6 +80,67 @@ impl let request_url = url::Url::parse(&format!("{scheme_str}://{authority}{path}")) .map_err(|e| HyperlightError::Error(format!("invalid request URL: {e}")))?; + // ----------------------------------------------------------------- + // Scoped credential — scope check & token resolution. + // + // If a credential is attached to this request, verify that the + // request URL falls within the credential's target scope and + // resolve the token now so we can inject the header later. + // + // Scope is enforced BEFORE the network permission check so a + // mis-scoped credential is rejected immediately with a clear + // error rather than leaking through to the allow-list gate. + // + // The credential-registry mutex is dropped before the resolver + // is invoked — resolvers may perform slow I/O (e.g. an IMDS + // round-trip on cache miss) and we do not want to serialise + // unrelated credentialed requests behind one slow resolver. + // ----------------------------------------------------------------- + let credential_header: Option<(String, String)> = + if let Some(ref cred_id) = request_data.attached_credential { + let entry = { + let registry = self.credential_registry.lock().map_err(|_| { + HyperlightError::Error("credential registry mutex poisoned".to_string()) + })?; + match registry.get(cred_id) { + Some(e) => e.clone(), + // Defensive: attach() validated this, but the registry + // could have been cleared between attach and dispatch. + None => { + return Ok(Err(ErrorCode::InternalError(Some( + "attached credential not found in registry".to_string(), + )))); + } + } + }; + + // Scope check: request URL must start with the + // credential's target prefix. + if !request_url.as_str().starts_with(&entry.target) { + return Ok(Err(ErrorCode::HTTPRequestDenied)); + } + + // Resolve the token by invoking the registered + // callback. The resolver returns the literal secret + // value on success; on failure we surface a fixed, + // host-redacted message so the guest never sees the + // resolver's diagnostic text (which could contain + // secret material). + let token = match (entry.resolver)() { + Ok(t) => t, + Err(_diag) => { + return Ok(Err(ErrorCode::InternalError(Some( + "credential resolver failed".to_string(), + )))); + } + }; + + let header_value = format!("{}{}", entry.prefix, token); + Some((entry.header.clone(), header_value)) + } else { + None + }; + { let Ok(network) = self.network.lock() else { return Ok(Err(ErrorCode::HTTPRequestDenied)); @@ -100,7 +161,7 @@ impl let active_requests = self.active_requests.clone(); // Collect guest headers eagerly in sync context. - let guest_headers: Vec<(String, String)> = request_data + let mut guest_headers: Vec<(String, String)> = request_data .headers .read() .block_on() @@ -109,6 +170,14 @@ impl .map(|(k, v)| (k, String::from_utf8_lossy(&v).into_owned())) .collect(); + // Inject the credential header, replacing any guest-set header + // with the same name so the guest cannot override the + // host-injected token. + if let Some((ref name, ref value)) = credential_header { + guest_headers.retain(|(k, _)| !k.eq_ignore_ascii_case(name)); + guest_headers.push((name.clone(), value.clone())); + } + let future_response = Resource::new(FutureIncomingResponse::default()); let future_response_clone = future_response.clone(); let future_response_panic = future_response.clone(); diff --git a/src/wasm_sandbox/src/wasi_impl/types/http_outgoing_request.rs b/src/wasm_sandbox/src/wasi_impl/types/http_outgoing_request.rs index 621e8f3..701aab5 100644 --- a/src/wasm_sandbox/src/wasi_impl/types/http_outgoing_request.rs +++ b/src/wasm_sandbox/src/wasi_impl/types/http_outgoing_request.rs @@ -11,6 +11,9 @@ pub struct OutgoingRequest { pub headers: Resource, pub body: Resource, body_taken: bool, + /// Credential identifier bound via `attach()`. Consumed by the + /// outgoing-handler dispatch path to inject the resolved token. + pub attached_credential: Option, } impl OutgoingRequest { @@ -23,6 +26,7 @@ impl OutgoingRequest { headers, body: Resource::default(), body_taken: false, + attached_credential: None, } } diff --git a/src/wasm_sandbox/tests/credential_integration.rs b/src/wasm_sandbox/tests/credential_integration.rs new file mode 100644 index 0000000..6423275 --- /dev/null +++ b/src/wasm_sandbox/tests/credential_integration.rs @@ -0,0 +1,721 @@ +//! Integration tests: scoped-credential injection end-to-end. +//! +//! Each test spins up a local [`EchoServer`], registers one or more +//! credentials, then runs guest Python code that exercises the +//! `credential=` kwarg on `http_get`/`http_post`. The echo server +//! reflects all received headers so we can assert exactly what the +//! host injected (or blocked). + +use std::path::Path; +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use hyperlight_sandbox::test_utils::EchoServer; +use hyperlight_sandbox::{CredentialEntry, HttpMethod, ResolverFn, SandboxBuilder}; +use hyperlight_wasm_sandbox::Wasm; + +fn python_guest_path() -> String { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("guests/python/python-sandbox.aot") + .display() + .to_string() +} + +/// Helper to build a [`CredentialEntry`] with sensible defaults and a +/// static token value. Tests that need rotation or fault injection +/// build a custom [`ResolverFn`] inline instead. +fn cred(target: &str, token: &str) -> CredentialEntry { + CredentialEntry::with_static_resolver(target, "authorization", "Bearer ", token) +} + +// ----------------------------------------------------------------------- +// Happy path: credential header is injected +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn credential_header_injected_on_get() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .register_credential("test_cred", cred(&base_url, "secret-token-42")) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + let code = format!( + r#" +resp = http_get("{base_url}/api/data", credential="test_cred") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + + let echo: serde_json::Value = + serde_json::from_str(result.stdout.trim()).expect("failed to parse echo response"); + let headers = echo["headers"].as_object().expect("missing headers"); + assert_eq!( + headers.get("authorization").and_then(|v| v.as_str()), + Some("Bearer secret-token-42"), + "credential header not injected or has wrong value" + ); +} + +#[tokio::test] +async fn credential_header_injected_on_post() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .register_credential("post_cred", cred(&base_url, "post-token-99")) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Post]) + .expect("allow_domain failed"); + + let code = format!( + r#" +resp = http_post("{base_url}/submit", body='{{"key": "val"}}', credential="post_cred") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + + let echo: serde_json::Value = + serde_json::from_str(result.stdout.trim()).expect("failed to parse echo response"); + let headers = echo["headers"].as_object().expect("missing headers"); + assert_eq!( + headers.get("authorization").and_then(|v| v.as_str()), + Some("Bearer post-token-99"), + ); +} + +// ----------------------------------------------------------------------- +// Error: unknown credential name → guest RuntimeError +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn unknown_credential_raises_error() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + // Note: no credential registered — guest tries to attach "ghost" + let code = format!( + r#" +resp = http_get("{base_url}/api", credential="ghost") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_ne!(result.exit_code, 0, "expected non-zero exit code"); + assert!( + result.stderr.contains("credential") || result.stderr.contains("RuntimeError"), + "stderr should mention credential error, got: {}", + result.stderr + ); +} + +// ----------------------------------------------------------------------- +// Error: scope mismatch — credential bound to different URL prefix +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn scope_mismatch_denied() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + // Credential scoped to https://example.com — won't match the local server + sandbox + .register_credential("wrong_scope", cred("https://example.com/api", "nope")) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + let code = format!( + r#" +resp = http_get("{base_url}/api/data", credential="wrong_scope") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_ne!( + result.exit_code, 0, + "expected non-zero exit code for scope mismatch" + ); + let stderr_lc = result.stderr.to_ascii_lowercase(); + assert!( + stderr_lc.contains("denied") || stderr_lc.contains("failed"), + "stderr should indicate request was denied, got: {}", + result.stderr + ); +} + +// ----------------------------------------------------------------------- +// Error: double-attach — attaching a second credential to the same req +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn double_attach_rejected() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .register_credential("cred_a", cred(&base_url, "token-a")) + .expect("register_credential failed"); + + sandbox + .register_credential("cred_b", cred(&base_url, "token-b")) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + // Guest code uses attach_credential directly to attempt double-attach + let code = format!( + r#" +import wit_world.imports.wasi_http_types as http_types +import wit_world.imports.credentials as creds + +fields = http_types.Fields.from_list([("user-agent", b"test")]) +req = http_types.OutgoingRequest(fields) +req.set_method(http_types.Method_Get()) +req.set_scheme(http_types.Scheme_Http()) +req.set_authority("{authority}") +req.set_path_with_query("/double") + +# First attach — should succeed +creds.attach(req, "cred_a") + +# Second attach — should fail with already-attached +try: + creds.attach(req, "cred_b") + print("ERROR: second attach did not raise") +except Exception as e: + print(f"OK: {{e}}") +"#, + authority = base_url.trim_start_matches("http://").trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + assert!( + result.stdout.contains("OK:"), + "expected OK from double-attach rejection, got stdout: {}", + result.stdout + ); +} + +// ----------------------------------------------------------------------- +// Security: guest-set Authorization header is replaced by credential +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn guest_cannot_override_credential_header() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .register_credential("legit", cred(&base_url, "real-token")) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + // Guest manually sets Authorization header, then also attaches a + // credential. The host must strip the guest's header and inject + // the credential's value instead. + let code = format!( + r#" +import wit_world.imports.wasi_http_types as http_types +import wit_world.imports.outgoing_handler as outgoing_handler +import wit_world.imports.credentials as creds + +fields = http_types.Fields.from_list([ + ("user-agent", b"test"), + ("authorization", b"Bearer evil-guest-token"), +]) +req = http_types.OutgoingRequest(fields) +req.set_method(http_types.Method_Get()) +req.set_scheme(http_types.Scheme_Http()) +req.set_authority("{authority}") +req.set_path_with_query("/sneaky") + +creds.attach(req, "legit") + +future_resp = outgoing_handler.handle(req, None) +pollable = future_resp.subscribe() +pollable.block() +resp_result = future_resp.get() +resp = resp_result +if hasattr(resp, 'value'): + resp = resp.value +if hasattr(resp, 'value'): + resp = resp.value +import json +body_stream = resp.consume().stream() +chunks = [] +while True: + try: + chunk = body_stream.read(65536) + if chunk: + chunks.append(chunk) + else: + break + except: + break +print(b"".join(chunks).decode()) +"#, + authority = base_url.trim_start_matches("http://").trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + + let echo: serde_json::Value = + serde_json::from_str(result.stdout.trim()).expect("failed to parse echo response"); + let headers = echo["headers"].as_object().expect("missing headers"); + let auth_val = headers + .get("authorization") + .and_then(|v| v.as_str()) + .expect("authorization header missing"); + + assert_eq!( + auth_val, "Bearer real-token", + "credential header must be the host-injected value, not the guest's" + ); + assert!( + !auth_val.contains("evil"), + "guest's fake authorization header must be stripped" + ); +} + +// ----------------------------------------------------------------------- +// No credential attached — request goes through without auth header +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn no_credential_means_no_auth_header() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + let code = format!( + r#" +resp = http_get("{base_url}/open") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + + let echo: serde_json::Value = + serde_json::from_str(result.stdout.trim()).expect("failed to parse echo response"); + let headers = echo["headers"].as_object().expect("missing headers"); + assert!( + headers.get("authorization").is_none(), + "no credential attached — authorization header should be absent" + ); +} + +// ----------------------------------------------------------------------- +// Host-side duplicate registration is rejected +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn duplicate_credential_registration_rejected() { + let sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .register_credential("dup", cred("https://example.com", "tok")) + .expect("first registration should succeed"); + + let err = sandbox + .register_credential("dup", cred("https://example.com", "tok2")) + .expect_err("second registration should fail"); + + assert!( + format!("{err}").contains("already registered"), + "error should mention 'already registered', got: {err}" + ); +} + +// ----------------------------------------------------------------------- +// Resolver is invoked per-request — proves token refresh contract +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn resolver_invoked_per_request() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + // Resolver that returns a different token on each invocation. + // This is the canonical proof that the host calls the resolver + // for every outgoing credentialed request, not just once at + // registration time. + let counter = Arc::new(AtomicUsize::new(0)); + let counter_for_resolver = Arc::clone(&counter); + let resolver: ResolverFn = Arc::new(move || { + let n = counter_for_resolver.fetch_add(1, Ordering::SeqCst); + Ok(format!("rotating-token-{n}")) + }); + + sandbox + .register_credential( + "rotating", + CredentialEntry { + target: base_url.clone(), + header: "authorization".to_string(), + prefix: "Bearer ".to_string(), + resolver, + }, + ) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + let code = format!( + r#" +import json +r1 = http_get("{base_url}/api/one", credential="rotating") +r2 = http_get("{base_url}/api/two", credential="rotating") +print(json.dumps([json.loads(r1["body"]), json.loads(r2["body"])])) +"#, + base_url = base_url.trim_end_matches('/') + ); + + (sandbox.run(&code).expect("sandbox run failed"), counter) + }) + .await + .unwrap(); + + let (exec, counter) = result; + assert_eq!(exec.exit_code, 0, "stderr: {}", exec.stderr); + + let echoes: Vec = + serde_json::from_str(exec.stdout.trim()).expect("failed to parse echo array"); + assert_eq!(echoes.len(), 2, "expected two echoed responses"); + assert_eq!( + echoes[0]["headers"]["authorization"].as_str(), + Some("Bearer rotating-token-0"), + "first request should see token-0" + ); + assert_eq!( + echoes[1]["headers"]["authorization"].as_str(), + Some("Bearer rotating-token-1"), + "second request should see token-1 — resolver MUST be called per request" + ); + assert_eq!( + counter.load(Ordering::SeqCst), + 2, + "resolver should have been invoked exactly twice" + ); +} + +// ----------------------------------------------------------------------- +// Resolver failure surfaces as a request error with no token leakage +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn resolver_failure_surfaces_as_error() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let result = tokio::task::spawn_blocking(move || { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + // Resolver that always fails. The diagnostic string MUST NOT + // appear in any guest-visible error — the host redacts it to a + // fixed message. + let resolver: ResolverFn = + Arc::new(|| Err("secret-bearing diagnostic that must not leak".to_string())); + + sandbox + .register_credential( + "broken", + CredentialEntry { + target: base_url.clone(), + header: "authorization".to_string(), + prefix: "Bearer ".to_string(), + resolver, + }, + ) + .expect("register_credential failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain failed"); + + let code = format!( + r#" +try: + resp = http_get("{base_url}/api/data", credential="broken") + print("UNEXPECTED_OK:" + resp["body"]) +except Exception as e: + print("ERR:" + repr(e)) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + assert!( + result.stdout.starts_with("ERR:"), + "guest should have raised an exception, got stdout: {}", + result.stdout + ); + // The host-redacted message is the only thing the guest may see. + assert!( + !result.stdout.contains("secret-bearing"), + "resolver diagnostic must NOT leak to guest, stdout was: {}", + result.stdout + ); + assert!( + !result.stdout.contains("must not leak"), + "resolver diagnostic must NOT leak to guest, stdout was: {}", + result.stdout + ); +} + +// ----------------------------------------------------------------------- +// Multi-tenant isolation: two sandboxes that register the same +// credential id with DIFFERENT tokens must each see only their own +// token. Proves the credential registry is per-`Sandbox` instance +// — there is no global key table, no shared `Arc`, no cross-instance +// lookup path. +// +// If this test ever fails it means the host has acquired a shared +// registry by mistake (e.g. a `lazy_static!`, a `OnceCell`, or a +// stray `Arc::clone` between sandboxes). Treat as critical. +// ----------------------------------------------------------------------- + +#[tokio::test] +async fn isolated_registries_across_sandboxes() { + let server = EchoServer::start().await; + let base_url = server.url(""); + + let (result_a, result_b) = tokio::task::spawn_blocking(move || { + // ---- Sandbox A: id="shared" → token-tenant-A ---- + let result_a = { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox A"); + + sandbox + .register_credential("shared", cred(&base_url, "token-tenant-A")) + .expect("register_credential on sandbox A failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain on sandbox A failed"); + + let code = format!( + r#" +resp = http_get("{base_url}/tenant-a", credential="shared") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox A run failed") + }; + + // ---- Sandbox B: same id="shared" → token-tenant-B ---- + let result_b = { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox B"); + + sandbox + .register_credential("shared", cred(&base_url, "token-tenant-B")) + .expect("register_credential on sandbox B failed"); + + sandbox + .allow_domain(&base_url, vec![HttpMethod::Get]) + .expect("allow_domain on sandbox B failed"); + + let code = format!( + r#" +resp = http_get("{base_url}/tenant-b", credential="shared") +print(resp["body"]) +"#, + base_url = base_url.trim_end_matches('/') + ); + + sandbox.run(&code).expect("sandbox B run failed") + }; + + (result_a, result_b) + }) + .await + .unwrap(); + + assert_eq!( + result_a.exit_code, 0, + "sandbox A stderr: {}", + result_a.stderr + ); + assert_eq!( + result_b.exit_code, 0, + "sandbox B stderr: {}", + result_b.stderr + ); + + let echo_a: serde_json::Value = + serde_json::from_str(result_a.stdout.trim()).expect("failed to parse echo response A"); + let echo_b: serde_json::Value = + serde_json::from_str(result_b.stdout.trim()).expect("failed to parse echo response B"); + + assert_eq!( + echo_a["headers"]["authorization"].as_str(), + Some("Bearer token-tenant-A"), + "sandbox A must see ONLY its own token" + ); + assert_eq!( + echo_b["headers"]["authorization"].as_str(), + Some("Bearer token-tenant-B"), + "sandbox B must see ONLY its own token" + ); + + // Belt-and-braces: neither sandbox's stdout contains the other's + // token. Catches any future regression where, say, a debug log + // path or a shared registry accidentally surfaces the foreign + // value to the guest. + assert!( + !result_a.stdout.contains("token-tenant-B"), + "sandbox A leaked sandbox B's token: {}", + result_a.stdout + ); + assert!( + !result_b.stdout.contains("token-tenant-A"), + "sandbox B leaked sandbox A's token: {}", + result_b.stdout + ); +} diff --git a/src/wasm_sandbox/tests/python_state_persistence.rs b/src/wasm_sandbox/tests/python_state_persistence.rs new file mode 100644 index 0000000..4002e27 --- /dev/null +++ b/src/wasm_sandbox/tests/python_state_persistence.rs @@ -0,0 +1,114 @@ +//! Integration test: Python guest module globals persist across `run()`. +//! +//! This test pins the documented contract that the Python `Executor` +//! reuses one module-level namespace for every call to `run()` on the +//! same sandbox instance. The previous implementation built a fresh +//! `globals` dict on every call (`exec(code, {...})`), which silently +//! discarded any `def`, `class`, or top-level assignment between runs. +//! That contradicted: +//! +//! * the `WasmSandbox` `snapshot`/`restore` contract — the documented +//! mechanism for rewinding guest state — which only makes sense +//! if state otherwise survives a `run()` boundary; +//! * the `python_basics` example's "state was rolled back" narrative; +//! * the JavaScript guest, which preserves `globalThis` across runs. +//! +//! The tests below would have failed on the prior implementation; they +//! pass once `Executor` stores its globals on the instance and reuses +//! them across `run()` calls. + +use std::path::Path; + +use hyperlight_sandbox::SandboxBuilder; +use hyperlight_wasm_sandbox::Wasm; + +fn python_guest_path() -> String { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("guests/python/python-sandbox.aot") + .display() + .to_string() +} + +/// A `def` at module top level in `run()` #1 must be callable in `run()` #2. +#[tokio::test] +async fn python_function_definition_persists_across_runs() { + let result = tokio::task::spawn_blocking(|| { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox + .run("def word_count(text): return len(text.split())") + .expect("first run failed"); + + sandbox + .run("print(word_count('hello world from hyperlight'))") + .expect("second run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + assert_eq!(result.stdout.trim(), "4"); +} + +/// A bare module-level assignment in `run()` #1 must be readable in `run()` #2. +#[tokio::test] +async fn python_top_level_assignment_persists_across_runs() { + let result = tokio::task::spawn_blocking(|| { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + sandbox.run("counter = 100").expect("first run failed"); + sandbox + .run("print(f'counter = {counter}')") + .expect("second run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + assert_eq!(result.stdout.trim(), "counter = 100"); +} + +/// `snapshot` + `restore` must continue to rewind the persistent +/// namespace, undoing any names defined since the snapshot. This is +/// the contract documented on `WasmSandbox`; the persistence fix must +/// not regress it. +#[tokio::test] +async fn python_restore_rewinds_module_globals() { + let result = tokio::task::spawn_blocking(|| { + let mut sandbox = SandboxBuilder::new() + .guest(Wasm) + .module_path(python_guest_path()) + .build() + .expect("failed to create sandbox"); + + let snap = sandbox.snapshot().expect("snapshot failed"); + sandbox + .run("rolled_back = 'still here'") + .expect("set failed"); + sandbox.restore(&snap).expect("restore failed"); + + sandbox + .run( + r#" +try: + print(rolled_back) +except NameError: + print("rolled_back is undefined") +"#, + ) + .expect("post-restore run failed") + }) + .await + .unwrap(); + + assert_eq!(result.exit_code, 0, "stderr: {}", result.stderr); + assert_eq!(result.stdout.trim(), "rolled_back is undefined"); +} diff --git a/src/wasm_sandbox/wit/hyperlight-sandbox.wit b/src/wasm_sandbox/wit/hyperlight-sandbox.wit index 5adc2cf..345b4e7 100644 --- a/src/wasm_sandbox/wit/hyperlight-sandbox.wit +++ b/src/wasm_sandbox/wit/hyperlight-sandbox.wit @@ -35,6 +35,7 @@ world root { import wasi:http/types@0.2.0; import wasi:http/outgoing-handler@0.2.0; import hyperlight:sandbox/tools; + import hyperlight:sandbox/credentials; export hyperlight:sandbox/executor; } @@ -54,9 +55,54 @@ package hyperlight:sandbox { dispatch: func(name: string, args-json: string) -> result; } + /// Scoped credential injection for outgoing HTTP requests. + /// + /// A credential is registered host-side with: + /// * a name (opaque string the guest uses to reference it), + /// * a target scope (URL prefix the credential is bound to), + /// * a header name + value prefix, + /// * a resolver callback that produces the secret value. + /// + /// The guest never sees the secret. It attaches a credential by name to an + /// `outgoing-request` before dispatching it through + /// `wasi:http/outgoing-handler`. At dispatch time the host: + /// 1. resolves the credential value (resolver callback), + /// 2. verifies the request URL is within the credential's target scope, + /// 3. injects `
: ` into the request, + /// 4. then proceeds with the normal `allow_domain` network gate. + /// + /// Both the credential scope check AND the allow_domain check must pass. + /// Credential lookup, scope match, resolver errors and unknown credentials + /// all surface as the `credential-error` variant. Secret values are never + /// included in error messages. + interface credentials { + use wasi:http/types@0.2.0.{outgoing-request}; + + variant credential-error { + /// No credential is registered under this name. + unknown, + /// The request URL does not fall within the credential's target scope. + scope-mismatch, + /// The host resolver callback failed. The inner string is a + /// host-redacted message safe for logging. + resolver-failed(string), + /// A credential is already attached to this request. + already-attached, + } + + /// Attach a registered credential to an outgoing-request. + /// The host will inject the credential's header at dispatch time. + /// Returns ok on success, credential-error on failure. + attach: func( + request: borrow, + credential: string, + ) -> result<_, credential-error>; + } + /// Minimal world for guests that bring their own WASI shim (e.g. ComponentizeJS). world sandbox { import tools; + import credentials; import wasi:filesystem/types@0.2.0; import wasi:filesystem/preopens@0.2.0; import wasi:http/types@0.2.0; diff --git a/src/wasm_sandbox/wit/sandbox-world.wasm b/src/wasm_sandbox/wit/sandbox-world.wasm new file mode 100644 index 0000000..a8a3e89 Binary files /dev/null and b/src/wasm_sandbox/wit/sandbox-world.wasm differ