diff --git a/.mise.toml b/.mise.toml index ea40fef..9cb1ae4 100644 --- a/.mise.toml +++ b/.mise.toml @@ -21,6 +21,7 @@ java = "latest" maven = "latest" mprocs = "latest" "npm:onnxruntime-web" = "latest" +"npm:pyodide" = "0.29.3" ollama = "latest" osv-scanner = "latest" pipx = "latest" @@ -236,6 +237,15 @@ uv build --wheel --out-dir pkg cargo run -p pyproject-to-package-json """ +[tasks.build-ws-pyface1-module] +description = "Build the pyface1 Python face detection workflow module" +dir = "services/ws-modules/pyface1" +run = """ +uv build --wheel --out-dir pkg +cargo run -p pyproject-to-package-json +yq eval-all -i 'select(fileIndex == 0) * select(fileIndex == 1)' pkg/package.json package.json +""" + [tasks.build-ws-java-data1-module] description = "Build the java-data1 workflow module" run = "mvn package" @@ -259,6 +269,7 @@ depends = [ "build-ws-java-data1-module", "build-ws-nfc-module", "build-ws-pydata1-module", + "build-ws-pyface1-module", "build-ws-sensor1-module", "build-ws-speech-recognition-module", "build-ws-video1-module", diff --git a/libs/edge-toolkit/src/config.rs b/libs/edge-toolkit/src/config.rs index 9aa44d3..4149b42 100644 --- a/libs/edge-toolkit/src/config.rs +++ b/libs/edge-toolkit/src/config.rs @@ -43,6 +43,9 @@ pub fn default_modules_folders() -> Vec { if let Some(p) = mise_npm_modules_path("onnxruntime-web") { paths.push(p); } + if let Some(p) = mise_npm_modules_path("pyodide") { + paths.push(p); + } paths } diff --git a/services/ws-modules/pyface1/package.json b/services/ws-modules/pyface1/package.json new file mode 100644 index 0000000..04e31e0 --- /dev/null +++ b/services/ws-modules/pyface1/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "et-model-face1": "*", + "onnxruntime-web": "*", + "pyodide": "*" + } +} diff --git a/services/ws-modules/pyface1/pkg/.gitignore b/services/ws-modules/pyface1/pkg/.gitignore new file mode 100644 index 0000000..d415af9 --- /dev/null +++ b/services/ws-modules/pyface1/pkg/.gitignore @@ -0,0 +1,2 @@ +*.whl +package.json diff --git a/services/ws-modules/pyface1/pkg/et_ws_pyface1.js b/services/ws-modules/pyface1/pkg/et_ws_pyface1.js new file mode 100644 index 0000000..9b74a0f --- /dev/null +++ b/services/ws-modules/pyface1/pkg/et_ws_pyface1.js @@ -0,0 +1,215 @@ +// et_ws_pyface1.js - Browser adapter for the Pyodide workflow. +// Interface: default(), run(), start(), stop(), is_running() + +const PYODIDE_BASE_URL = "/modules/pyodide/"; + +let pyodide; +let py; +let cfg; +let runtime = null; +let workCanvas = null; +let tensorData = null; + +export default async function init() { + if (!globalThis.loadPyodide) { + await new Promise((resolve, reject) => { + const script = document.createElement("script"); + script.src = `${PYODIDE_BASE_URL}pyodide.js`; + script.onload = resolve; + script.onerror = reject; + document.head.appendChild(script); + }); + } + + pyodide = await globalThis.loadPyodide({ indexURL: PYODIDE_BASE_URL }); + const pkg = await fetch(new URL("package.json", import.meta.url)).then((r) => r.json()); + const wheel = `${pkg.name.replace(/-/g, "_")}-${pkg.version}-py3-none-any.whl`; + const wheelBytes = new Uint8Array(await fetch(new URL(wheel, import.meta.url)).then((r) => r.arrayBuffer())); + pyodide.FS.writeFile(`/tmp/${wheel}`, wheelBytes); + pyodide.runPython(`import sys\nsys.path.insert(0, "/tmp/${wheel}")`); + py = pyodide.pyimport("pyface1"); + cfg = py.config().toJs({ dict_converter: Object.fromEntries }); +} + +export const is_running = () => runtime !== null; +export const start = () => run(); + +export async function run() { + if (!py) throw new Error("pyface1: not initialized"); + if (runtime) return; + + setStatus(py.starting_status()); + log(py.model_log_message()); + + let client = null; + let stream = null; + let state = null; + + try { + const { WsClient, WsClientConfig } = await import("/modules/et-ws-wasm-agent/et_ws_wasm_agent.js"); + const protocol = window.location.protocol === "https:" ? "wss:" : "ws:"; + client = new WsClient(new WsClientConfig(`${protocol}//${window.location.host}/ws`)); + client.connect(); + for (let i = 0; client.get_state() !== "connected" && i < 100; i++) await sleep(100); + if (client.get_state() !== "connected") throw new Error("Timed out waiting for websocket connection"); + log(`websocket connected with agent_id=${client.get_client_id()}`); + + stream = await navigator.mediaDevices.getUserMedia({ audio: false, video: true }); + const video = element("video-preview", HTMLVideoElement); + video.srcObject = stream; + video.hidden = false; + for (let i = 0; video.videoWidth === 0 && i < 50; i++) await sleep(100); + if (video.videoWidth === 0 || video.videoHeight === 0) throw new Error("Video stream metadata did not load"); + await video.play(); + + const wasm = globalThis.ort?.env?.wasm; + const version = globalThis.ort?.env?.versions?.web; + if (!wasm || !version) throw new Error("onnxruntime-web environment is unavailable"); + const base = "/modules/onnxruntime-web/dist"; + wasm.numThreads = globalThis.crossOriginIsolated && globalThis.SharedArrayBuffer ? 0 : 1; + wasm.wasmPaths = { mjs: `${base}/ort-wasm-simd-threaded.mjs`, wasm: `${base}/ort-wasm-simd-threaded.wasm` }; + + const session = await globalThis.ort.InferenceSession.create(cfg.model_path, { executionProviders: ["wasm"] }); + const outputNames = py.validate_output_names(pyodide.toPy(Array.from(session.outputNames))).toJs(); + state = { client, stream, session, inputName: session.inputNames[0], outputNames }; + runtime = state; + + await py.run( + state.inputName, + pyodide.toPy(outputNames), + pyodide.toPy(() => infer(state)), + pyodide.toPy((message) => client.send(message)), + pyodide.toPy(render), + pyodide.toPy(sleep), + pyodide.toPy(log), + pyodide.toPy(setStatus), + pyodide.toPy(() => runtime !== state), + ); + } finally { + cleanup(state ?? { client, stream }); + } +} + +export function stop() { + if (!runtime) return; + cleanup(runtime); + log("pyface1 face detection demo stopped"); +} + +async function infer(state) { + const video = element("video-preview", HTMLVideoElement); + if (video.videoWidth <= 0 || video.videoHeight <= 0) throw new Error("Video stream is not ready yet."); + + const geometry = py.preprocess_geometry(video.videoWidth, video.videoHeight).toJs({ + dict_converter: Object.fromEntries, + }); + const canvas = workCanvas ??= document.createElement("canvas"); + canvas.width = cfg.input_width; + canvas.height = cfg.input_height; + + const ctx = canvas.getContext("2d"); + ctx.clearRect(0, 0, canvas.width, canvas.height); + ctx.drawImage(video, 0, 0, geometry.resized_width, geometry.resized_height); + + const tensor = imageDataToTensor(ctx.getImageData(0, 0, canvas.width, canvas.height).data); + const outputs = await state.session.run({ + [state.inputName]: new globalThis.ort.Tensor("float32", tensor, [ + 1, + cfg.input_height, + cfg.input_width, + 3, + ]), + }); + + return pyodide.toPy({ + loc: Array.from(outputs[state.outputNames[0]].data), + conf: Array.from(outputs[state.outputNames[1]].data), + landm: Array.from(outputs[state.outputNames[2]].data), + resize_ratio: geometry.resize_ratio, + source_width: video.videoWidth, + source_height: video.videoHeight, + }); +} + +function render(detectionsJson) { + const video = element("video-preview", HTMLVideoElement); + if (video.videoWidth === 0 || video.videoHeight === 0) return; + + const canvas = element("video-output-canvas", HTMLCanvasElement); + const ctx = canvas.getContext("2d"); + canvas.width = video.videoWidth; + canvas.height = video.videoHeight; + canvas.hidden = false; + ctx.drawImage(video, 0, 0, canvas.width, canvas.height); + ctx.lineWidth = 3; + ctx.font = "16px ui-monospace, monospace"; + + for (const detection of JSON.parse(detectionsJson)) { + const [left, top, right, bottom] = detection.box; + const label = `${detection.label} ${(detection.score * 100).toFixed(1)}%`; + ctx.strokeStyle = "#ef8f35"; + ctx.strokeRect(left, top, Math.max(right - left, 1), Math.max(bottom - top, 1)); + ctx.fillStyle = "#182028"; + ctx.fillRect(left, Math.max(top - 24, 0), ctx.measureText(label).width + 10, 22); + ctx.fillStyle = "#fffdfa"; + ctx.fillText(label, left + 5, Math.max(top - 8, 16)); + } +} + +function cleanup(state) { + if (runtime === state) runtime = null; + for (const track of state?.stream?.getTracks?.() ?? []) track.stop(); + state?.client?.disconnect?.(); + + const video = document.getElementById("video-preview"); + if (video) { + video.pause(); + video.srcObject = null; + video.hidden = true; + } + + const canvas = document.getElementById("video-output-canvas"); + if (canvas) { + canvas.hidden = true; + canvas.getContext("2d")?.clearRect(0, 0, canvas.width, canvas.height); + } +} + +function setStatus(message) { + const element = document.getElementById("module-output"); + if (element) element.value = message; +} + +function log(message) { + const line = `[pyface1] ${message}`; + console.log(line); + const element = document.getElementById("log"); + if (element) element.textContent = element.textContent ? `${element.textContent}\n${line}` : line; +} + +function sleep(ms) { + return new Promise((resolve) => window.setTimeout(resolve, ms)); +} + +function imageDataToTensor(rgba) { + const pixelCount = cfg.input_width * cfg.input_height; + if (!tensorData || tensorData.length !== pixelCount * 3) { + tensorData = new Float32Array(pixelCount * 3); + } + + for (let pixel = 0; pixel < pixelCount; pixel++) { + const rgbaIndex = pixel * 4; + const tensorIndex = pixel * 3; + tensorData[tensorIndex] = rgba[rgbaIndex + 2] - 104; + tensorData[tensorIndex + 1] = rgba[rgbaIndex + 1] - 117; + tensorData[tensorIndex + 2] = rgba[rgbaIndex] - 123; + } + + return tensorData; +} + +function element(id, type) { + const found = document.getElementById(id); + if (!(found instanceof type)) throw new Error(`Missing #${id} element`); + return found; +} diff --git a/services/ws-modules/pyface1/pyface1/__init__.py b/services/ws-modules/pyface1/pyface1/__init__.py new file mode 100644 index 0000000..4d2f480 --- /dev/null +++ b/services/ws-modules/pyface1/pyface1/__init__.py @@ -0,0 +1,29 @@ +"""pyface1: Python support code for the face detection workflow.""" + +from .face_detection import ( + FACE_MODEL_PATH, + config, + decode_outputs, + event_payload, + model_log_message, + preprocess_geometry, + run, + starting_status, + stopped_status, + status_text, + validate_output_names, +) + +__all__ = [ + "FACE_MODEL_PATH", + "config", + "decode_outputs", + "event_payload", + "model_log_message", + "preprocess_geometry", + "run", + "starting_status", + "stopped_status", + "status_text", + "validate_output_names", +] diff --git a/services/ws-modules/pyface1/pyface1/face_detection.py b/services/ws-modules/pyface1/pyface1/face_detection.py new file mode 100644 index 0000000..cf9b637 --- /dev/null +++ b/services/ws-modules/pyface1/pyface1/face_detection.py @@ -0,0 +1,431 @@ +"""RetinaFace post-processing helpers for the pyface1 workflow.""" + +from __future__ import annotations + +import math +import json +import time +from datetime import datetime +from functools import lru_cache +from typing import Iterable, Sequence, TypedDict + +FACE_MODEL_PATH = "/modules/et-model-face1/video_cv.onnx" +FACE_INPUT_WIDTH = 640 +FACE_INPUT_HEIGHT = 608 +FACE_INFERENCE_INTERVAL_MS = 750 +FACE_RENDER_INTERVAL_MS = 60 +FACE_MAX_INFERENCES = 20 +FACE_MAX_RUNTIME_MS = 30_000 +RETINAFACE_CONFIDENCE_THRESHOLD = 0.75 +RETINAFACE_NMS_THRESHOLD = 0.4 +RETINAFACE_VARIANCES = (0.1, 0.2) +RETINAFACE_MIN_SIZES = ((16.0, 32.0), (64.0, 128.0), (256.0, 512.0)) +RETINAFACE_STEPS = (8.0, 16.0, 32.0) + + +Box = list[float] +Prior = tuple[float, float, float, float] +DecodedBox = tuple[float, float, float, float] + + +class Detection(TypedDict): + label: str + class_index: int + score: float + box: Box + + +class DetectionSummary(TypedDict): + detections: list[Detection] + confidence: float + processed_at: str + + +async def run( + input_name, + output_names, + infer_once, + send_event, + render, + sleep_ms, + log, + set_status, + should_stop, +) -> None: + """Run the browser face detection workflow using JS platform callbacks.""" + output_names = [str(name) for name in output_names] + last_has_detection = False + inference_count = 0 + started_at = time.monotonic() + detections: list[Detection] = [] + + startup_summary = initial_summary() + set_status(status_text(str(input_name), output_names, startup_summary)) + + while not should_stop(): + elapsed_ms = (time.monotonic() - started_at) * 1000.0 + if inference_count >= FACE_MAX_INFERENCES or elapsed_ms >= FACE_MAX_RUNTIME_MS: + break + + try: + capture = await infer_once() + summary = decode_outputs( + capture["loc"], + capture["conf"], + capture["landm"], + capture["resize_ratio"], + capture["source_width"], + capture["source_height"], + ) + inference_count += 1 + detections = summary["detections"] + has_detection = bool(detections) + changed = last_has_detection != has_detection + last_has_detection = has_detection + + set_status(status_text(str(input_name), output_names, summary)) + render(detections_json(detections)) + send_event( + client_event_json( + event_payload( + summary, + changed, + str(input_name), + output_names, + capture["source_width"], + capture["source_height"], + ) + ) + ) + except Exception as exc: + message = f"pyface1 face detection: inference error\n{exc}" + set_status(message) + log(f"inference error: {exc}") + + remaining_ms = FACE_INFERENCE_INTERVAL_MS + while remaining_ms > 0 and not should_stop(): + render(detections_json(detections)) + delay = min(FACE_RENDER_INTERVAL_MS, remaining_ms) + await sleep_ms(delay) + remaining_ms -= delay + + if inference_count >= FACE_MAX_INFERENCES: + log(f"workflow finished automatically after {FACE_MAX_INFERENCES} inferences") + elif (time.monotonic() - started_at) * 1000.0 >= FACE_MAX_RUNTIME_MS: + log("workflow finished automatically after 30 seconds") + set_status(stopped_status()) + + +def config() -> dict[str, object]: + """Return browser-facing constants for this workflow.""" + return { + "model_path": FACE_MODEL_PATH, + "input_width": FACE_INPUT_WIDTH, + "input_height": FACE_INPUT_HEIGHT, + } + + +def starting_status() -> str: + return "pyface1 face detection: starting" + + +def stopped_status() -> str: + return "pyface1 face detection demo stopped." + + +def model_log_message() -> str: + return f"loading RetinaFace model from {FACE_MODEL_PATH}" + + +def validate_output_names(output_names: Iterable[object]) -> list[str]: + output_names = [str(name) for name in output_names] + if len(output_names) < 3: + raise ValueError("RetinaFace session did not expose the expected outputs") + return output_names + + +def initial_summary() -> DetectionSummary: + return { + "detections": [], + "confidence": 0.0, + "processed_at": "waiting for first inference", + } + + +def preprocess_geometry(source_width: float, source_height: float) -> dict[str, float]: + source_width = require_positive_finite(source_width, "source_width") + source_height = require_positive_finite(source_height, "source_height") + target_ratio = FACE_INPUT_HEIGHT / FACE_INPUT_WIDTH + resize_ratio = ( + FACE_INPUT_WIDTH / source_width + if source_height / source_width <= target_ratio + else FACE_INPUT_HEIGHT / source_height + ) + return { + "resize_ratio": resize_ratio, + "resized_width": float( + int(clamp(round(source_width * resize_ratio), 1, FACE_INPUT_WIDTH)) + ), + "resized_height": float( + int(clamp(round(source_height * resize_ratio), 1, FACE_INPUT_HEIGHT)) + ), + } + + +def detections_json(detections: list[Detection]) -> str: + return json.dumps(detections) + + +def client_event_json(details: dict[str, object]) -> str: + return json.dumps( + { + "type": "client_event", + "capability": "face_detection", + "action": "inference", + "details": details, + } + ) + + +def decode_outputs( + loc_values: Iterable[object], + conf_values: Iterable[object], + landm_values: Iterable[object], + resize_ratio: float, + source_width: float, + source_height: float, +) -> DetectionSummary: + """Decode RetinaFace ONNX outputs into detections and summary metadata.""" + resize_ratio = require_positive_finite(resize_ratio, "resize_ratio") + source_width = require_non_negative_finite(source_width, "source_width") + source_height = require_non_negative_finite(source_height, "source_height") + + loc = output_values(loc_values, "loc", 4) + conf = output_values(conf_values, "conf", 2) + landm = output_values(landm_values, "landm", 10) + prior_count = len(loc) // 4 + + if ( + prior_count == 0 + or len(conf) != prior_count * 2 + or len(landm) != prior_count * 10 + ): + raise ValueError("RetinaFace outputs had unexpected shapes") + + priors = model_priors() + if len(priors) != prior_count: + raise ValueError("RetinaFace priors did not match output count") + + detections: list[Detection] = [] + for index in range(prior_count): + score = softmax((conf[index * 2], conf[index * 2 + 1]))[1] + if score < RETINAFACE_CONFIDENCE_THRESHOLD: + continue + + decoded = decode_box( + ( + loc[index * 4], + loc[index * 4 + 1], + loc[index * 4 + 2], + loc[index * 4 + 3], + ), + priors[index], + ) + box: Box = [ + clamp((decoded[0] * FACE_INPUT_WIDTH) / resize_ratio, 0.0, source_width), + clamp((decoded[1] * FACE_INPUT_HEIGHT) / resize_ratio, 0.0, source_height), + clamp((decoded[2] * FACE_INPUT_WIDTH) / resize_ratio, 0.0, source_width), + clamp((decoded[3] * FACE_INPUT_HEIGHT) / resize_ratio, 0.0, source_height), + ] + + detections.append( + { + "label": "face", + "class_index": 0, + "score": score, + "box": box, + } + ) + + detections = apply_nms(detections, RETINAFACE_NMS_THRESHOLD) + confidence = detections[0]["score"] if detections else 0.0 + return { + "detections": detections, + "confidence": float(confidence), + "processed_at": datetime.now().strftime("%X"), + } + + +def status_text( + input_name: str, output_names: Iterable[object], summary: DetectionSummary +) -> str: + """Render the browser status text used by the face detection demo.""" + outputs = ", ".join(str(name) for name in output_names) + lines = [ + "pyface1 face detection demo", + f"model file: {FACE_MODEL_PATH}", + f"input: {input_name}", + f"outputs: {outputs}", + f"detections: {len(summary['detections'])}", + f"best confidence: {summary['confidence']:.4f}", + f"processed at: {summary['processed_at']}", + ] + + if summary["detections"]: + box = summary["detections"][0]["box"] + lines.extend( + [ + "", + f"best box: {box[0]:.1f}, {box[1]:.1f}, {box[2]:.1f}, {box[3]:.1f}", + ] + ) + + return "\n".join(lines) + + +def event_payload( + summary: DetectionSummary, + changed: bool, + input_name: str, + output_names: Iterable[object], + source_width: float, + source_height: float, +) -> dict[str, object]: + """Build the WebSocket client event payload.""" + source_width = require_non_negative_finite(source_width, "source_width") + source_height = require_non_negative_finite(source_height, "source_height") + + has_detection = bool(summary["detections"]) + return { + "mode": "detection", + "detected_class": "face" if has_detection else "no_detection", + "class_index": 0 if has_detection else -1, + "confidence": summary["confidence"], + "detections": summary["detections"], + "changed": changed, + "processed_at": summary["processed_at"], + "model_path": FACE_MODEL_PATH, + "input_name": input_name, + "output_names": list(output_names), + "source_resolution": { + "width": float(source_width), + "height": float(source_height), + }, + } + + +def build_priors(image_height: float, image_width: float) -> list[Prior]: + image_height = require_positive_finite(image_height, "image_height") + image_width = require_positive_finite(image_width, "image_width") + + priors: list[Prior] = [] + for index, step in enumerate(RETINAFACE_STEPS): + feature_map_height = math.ceil(image_height / step) + feature_map_width = math.ceil(image_width / step) + for row in range(feature_map_height): + for column in range(feature_map_width): + for min_size in RETINAFACE_MIN_SIZES[index]: + priors.append( + ( + ((column + 0.5) * step) / image_width, + ((row + 0.5) * step) / image_height, + min_size / image_width, + min_size / image_height, + ) + ) + return priors + + +@lru_cache(maxsize=1) +def model_priors() -> tuple[Prior, ...]: + return tuple(build_priors(float(FACE_INPUT_HEIGHT), float(FACE_INPUT_WIDTH))) + + +def decode_box(loc: Sequence[float], prior: Sequence[float]) -> DecodedBox: + if len(loc) != 4: + raise ValueError("loc must contain exactly 4 values") + if len(prior) != 4: + raise ValueError("prior must contain exactly 4 values") + + center_x = prior[0] + loc[0] * RETINAFACE_VARIANCES[0] * prior[2] + center_y = prior[1] + loc[1] * RETINAFACE_VARIANCES[0] * prior[3] + width = prior[2] * math.exp(loc[2] * RETINAFACE_VARIANCES[1]) + height = prior[3] * math.exp(loc[3] * RETINAFACE_VARIANCES[1]) + return ( + center_x - width / 2.0, + center_y - height / 2.0, + center_x + width / 2.0, + center_y + height / 2.0, + ) + + +def apply_nms(detections: list[Detection], threshold: float) -> list[Detection]: + threshold = require_non_negative_finite(threshold, "threshold") + kept: list[Detection] = [] + for candidate in sorted(detections, key=lambda item: item["score"], reverse=True): + if all(compute_iou(candidate, accepted) <= threshold for accepted in kept): + kept.append(candidate) + return kept + + +def compute_iou(left: Detection, right: Detection) -> float: + left_box = left["box"] + right_box = right["box"] + x1 = max(left_box[0], right_box[0]) + y1 = max(left_box[1], right_box[1]) + x2 = min(left_box[2], right_box[2]) + y2 = min(left_box[3], right_box[3]) + width = max(x2 - x1 + 1.0, 0.0) + height = max(y2 - y1 + 1.0, 0.0) + intersection = width * height + left_area = max(left_box[2] - left_box[0] + 1.0, 0.0) * max( + left_box[3] - left_box[1] + 1.0, + 0.0, + ) + right_area = max(right_box[2] - right_box[0] + 1.0, 0.0) * max( + right_box[3] - right_box[1] + 1.0, + 0.0, + ) + return intersection / max(left_area + right_area - intersection, 1e-6) + + +def softmax(values: Iterable[object]) -> list[float]: + values = [float(value) for value in values] + if not values: + return [] + max_value = max(values) + exps = [math.exp(value - max_value) for value in values] + total = sum(exps) + return [value / total for value in exps] + + +def clamp(value: float, minimum: float, maximum: float) -> float: + return max(minimum, min(value, maximum)) + + +def output_values(values: Iterable[object], name: str, stride: int) -> list[float]: + if stride <= 0: + raise ValueError("stride must be positive") + + try: + output = [float(value) for value in values] + except (TypeError, ValueError) as exc: + raise ValueError(f"{name} output contained non-numeric values") from exc + + if len(output) % stride != 0: + raise ValueError("RetinaFace outputs had unexpected shapes") + return output + + +def require_positive_finite(value: float, name: str) -> float: + value = float(value) + if not math.isfinite(value) or value <= 0.0: + raise ValueError(f"{name} must be a positive finite number") + return value + + +def require_non_negative_finite(value: float, name: str) -> float: + value = float(value) + if not math.isfinite(value) or value < 0.0: + raise ValueError(f"{name} must be a non-negative finite number") + return value diff --git a/services/ws-modules/pyface1/pyproject.toml b/services/ws-modules/pyface1/pyproject.toml new file mode 100644 index 0000000..919a929 --- /dev/null +++ b/services/ws-modules/pyface1/pyproject.toml @@ -0,0 +1,18 @@ +[project] +dependencies = [] +description = "Python face detection" +license = "Apache-2.0 OR MIT" +name = "et-ws-pyface1" +requires-python = ">=3.10" +version = "0.1.0" + +[build-system] +build-backend = "uv_build" +requires = ["uv_build>=0.10.2,<0.11.0"] + +[tool.uv.build-backend] +module-name = "pyface1" +module-root = "" + +[tool.ws-module] +js-main = "et_ws_pyface1.js" diff --git a/services/ws-modules/pyface1/tests/test_face_detection.py b/services/ws-modules/pyface1/tests/test_face_detection.py new file mode 100644 index 0000000..7007ce8 --- /dev/null +++ b/services/ws-modules/pyface1/tests/test_face_detection.py @@ -0,0 +1,71 @@ +import math +import unittest + +from pyface1.face_detection import ( + FACE_INPUT_HEIGHT, + FACE_INPUT_WIDTH, + build_priors, + decode_box, + decode_outputs, + output_values, + preprocess_geometry, + softmax, +) + + +class RetinaFaceTests(unittest.TestCase): + def test_softmax_handles_empty_equal_and_large_values(self) -> None: + self.assertEqual(softmax([]), []) + + equal = softmax([4.0, 4.0, 4.0, 4.0]) + self.assertTrue(all(abs(value - 0.25) < 1e-6 for value in equal)) + + large = softmax([1000.0, 1001.0]) + self.assertEqual(len(large), 2) + self.assertTrue(all(math.isfinite(value) for value in large)) + self.assertAlmostEqual(sum(large), 1.0, places=6) + self.assertGreater(large[1], large[0]) + + def test_prior_count_matches_model_input_shape(self) -> None: + priors = build_priors(float(FACE_INPUT_HEIGHT), float(FACE_INPUT_WIDTH)) + + self.assertEqual(len(priors), 15_960) + self.assertAlmostEqual(priors[0][0], 4.0 / FACE_INPUT_WIDTH, places=6) + self.assertAlmostEqual(priors[0][1], 4.0 / FACE_INPUT_HEIGHT, places=6) + self.assertAlmostEqual(priors[0][2], 16.0 / FACE_INPUT_WIDTH, places=6) + self.assertAlmostEqual(priors[0][3], 16.0 / FACE_INPUT_HEIGHT, places=6) + + def test_zero_offsets_decode_to_prior_box(self) -> None: + decoded = decode_box([0.0, 0.0, 0.0, 0.0], [0.5, 0.5, 0.25, 0.5]) + + self.assertAlmostEqual(decoded[0], 0.375, places=6) + self.assertAlmostEqual(decoded[1], 0.25, places=6) + self.assertAlmostEqual(decoded[2], 0.625, places=6) + self.assertAlmostEqual(decoded[3], 0.75, places=6) + + def test_output_values_rejects_trailing_shape_data(self) -> None: + with self.assertRaisesRegex(ValueError, "unexpected shapes"): + output_values([0.0, 1.0, 2.0], "loc", 4) + + def test_decode_outputs_rejects_invalid_resize_ratio(self) -> None: + loc = [0.0] * (15_960 * 4) + conf = [0.0] * (15_960 * 2) + landm = [0.0] * (15_960 * 10) + + with self.assertRaisesRegex(ValueError, "resize_ratio"): + decode_outputs(loc, conf, landm, 0.0, 640.0, 480.0) + + def test_preprocess_geometry_preserves_source_aspect_ratio(self) -> None: + wide = preprocess_geometry(1280.0, 720.0) + self.assertAlmostEqual(wide["resize_ratio"], FACE_INPUT_WIDTH / 1280.0) + self.assertEqual(wide["resized_width"], 640.0) + self.assertEqual(wide["resized_height"], 360.0) + + tall = preprocess_geometry(480.0, 960.0) + self.assertAlmostEqual(tall["resize_ratio"], FACE_INPUT_HEIGHT / 960.0) + self.assertEqual(tall["resized_width"], 304.0) + self.assertEqual(tall["resized_height"], 608.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/services/ws-server/Dockerfile b/services/ws-server/Dockerfile index 28a752f..1316d9a 100644 --- a/services/ws-server/Dockerfile +++ b/services/ws-server/Dockerfile @@ -14,7 +14,7 @@ WORKDIR /workspace COPY . . -RUN npm install --omit=dev --prefix /workspace/runtime-deps onnxruntime-web +RUN npm install --omit=dev --prefix /workspace/runtime-deps onnxruntime-web pyodide RUN cargo build -p et-ws-server --release --locked FROM debian:bookworm-slim AS runtime @@ -32,6 +32,7 @@ COPY --from=builder /workspace/services/ws-wasm-agent/pkg ./services/ws-wasm-age COPY --from=builder /workspace/services/ws-modules ./services/ws-modules COPY --from=builder /workspace/data/model-modules ./data/model-modules COPY --from=builder /workspace/runtime-deps/node_modules/onnxruntime-web ./node_modules/onnxruntime-web +COPY --from=builder /workspace/runtime-deps/node_modules/pyodide ./node_modules/pyodide RUN mkdir -p /app/storage \ && chown -R app:app /app diff --git a/services/ws-server/static/app.js b/services/ws-server/static/app.js index da5f7e2..efa9f7d 100644 --- a/services/ws-server/static/app.js +++ b/services/ws-server/static/app.js @@ -47,7 +47,7 @@ const populateModuleDropdown = async () => { append(`Skipping ${name}: already loaded as the main WASM agent module`); continue; } - if (name === "onnxruntime-web") { + if (name === "onnxruntime-web" || name === "pyodide") { append(`Skipping ${name}: already loaded as a dependency`); continue; } diff --git a/utilities/cli/src/lib.rs b/utilities/cli/src/lib.rs index 705465b..39543b8 100644 --- a/utilities/cli/src/lib.rs +++ b/utilities/cli/src/lib.rs @@ -231,10 +231,14 @@ fn generate_mise_deployment(cluster: &ClusterInput, output_dir: &Path) -> Result .map(|p| format!(" {p}")) .collect::>() .join(",\\\n"); - let ws_server_run = format!( - "export MODULES_PATHS=\"\\\n{},\\\n $(mise where npm:onnxruntime-web)/lib/node_modules\"\ncargo run\n", - module_paths_lines - ); + let mise_dependency_paths = [ + "$(mise where npm:onnxruntime-web)/lib/node_modules", + "$(mise where npm:pyodide)/lib/node_modules", + ] + .map(|path| format!(" {path}")) + .join(",\\\n"); + let ws_server_run = + format!("export MODULES_PATHS=\"\\\n{module_paths_lines},\\\n{mise_dependency_paths}\"\ncargo run\n"); let ws_server_rel = relative_path_from(&output_abs, &ws_server_dir).display().to_string(); let mut root = Table::new(); @@ -725,11 +729,12 @@ fn scenario_module_paths(ws_server_dir: &Path, module_names: &[String]) -> Vec Vec { - let mut paths = Vec::with_capacity(module_names.len() + 4); + let mut paths = Vec::with_capacity(module_names.len() + 5); paths.push("/app/services/ws-server/static".to_string()); paths.push("/app/services/ws-wasm-agent".to_string()); paths.push("/app/data/model-modules".to_string()); paths.push("/app/node_modules/onnxruntime-web".to_string()); + paths.push("/app/node_modules/pyodide".to_string()); for module_name in module_names { paths.push(format!("/app/services/ws-modules/{module_name}")); } diff --git a/utilities/cli/src/tests.rs b/utilities/cli/src/tests.rs index 417ec56..84c0b44 100644 --- a/utilities/cli/src/tests.rs +++ b/utilities/cli/src/tests.rs @@ -33,6 +33,7 @@ fn docker_image_module_paths_include_static_root_module() { assert!(paths.contains(&"/app/services/ws-wasm-agent".to_string())); assert!(paths.contains(&"/app/data/model-modules".to_string())); assert!(paths.contains(&"/app/node_modules/onnxruntime-web".to_string())); + assert!(paths.contains(&"/app/node_modules/pyodide".to_string())); assert!(paths.contains(&"/app/services/ws-modules/face-detection".to_string())); } diff --git a/verification/local/output/facility-security-scenario/compose.yaml b/verification/local/output/facility-security-scenario/compose.yaml index e3e7ef2..0bc8605 100644 --- a/verification/local/output/facility-security-scenario/compose.yaml +++ b/verification/local/output/facility-security-scenario/compose.yaml @@ -29,6 +29,7 @@ services: /app/services/ws-wasm-agent,\ /app/data/model-modules,\ /app/node_modules/onnxruntime-web,\ + /app/node_modules/pyodide,\ /app/services/ws-modules/face-detection,\ /app/services/ws-modules/har1" OTLP_AUTH_PASSWORD: "1234" diff --git a/verification/local/output/facility-security-scenario/mise.toml b/verification/local/output/facility-security-scenario/mise.toml index b8f960b..07d8a9f 100644 --- a/verification/local/output/facility-security-scenario/mise.toml +++ b/verification/local/output/facility-security-scenario/mise.toml @@ -25,7 +25,8 @@ export MODULES_PATHS="\ ../../data/model-modules,\ ../ws-modules/face-detection,\ ../ws-modules/har1,\ - $(mise where npm:onnxruntime-web)/lib/node_modules" + $(mise where npm:onnxruntime-web)/lib/node_modules,\ + $(mise where npm:pyodide)/lib/node_modules" cargo run '''