diff --git a/.github/workflows/traceability.yml b/.github/workflows/traceability.yml new file mode 100644 index 00000000..44c85d9f --- /dev/null +++ b/.github/workflows/traceability.yml @@ -0,0 +1,123 @@ +name: Refresh SEP traceability manifest + +# Regenerates src/seps/traceability.json by running the conformance suite against +# the reference SDK and recording which check IDs were emitted, then opens a PR +# with the diff. NOT a PR gate — runs on demand / on a schedule and proposes an +# update for review. plan.modelcontextprotocol.io reads the committed file from +# main. +# +# Depends on the `conformance sdk` subcommand (#277), which clones+builds the SDK +# and runs the client+server suites. The `run` job executes third-party SDK code, +# so it has NO repo write token (read-only perms, persist-credentials: false) and +# only uploads results as an artifact; the separate `propose` job holds the +# write/PR permissions and never executes SDK code. + +on: + workflow_dispatch: + inputs: + sdk: + description: 'SDK ref to run against (e.g. typescript-sdk@)' + default: 'typescript-sdk@main' + schedule: + - cron: '0 6 * * 1' # Weekly, Monday 06:00 UTC. + +concurrency: + group: traceability-refresh + cancel-in-progress: true + +jobs: + run: + runs-on: ubuntu-latest + permissions: + contents: read + env: + SDK_REF: ${{ inputs.sdk || 'typescript-sdk@main' }} + steps: + - uses: actions/checkout@v6 + with: + persist-credentials: false # no git token while SDK code runs + + - uses: actions/setup-node@v6 + with: + node-version: 24 + cache: npm + + - run: npm ci + - run: npm run build + + - name: Run conformance suites against the reference SDK + # `sdk` requires --mode client|server; run both into the same results dir + # (the second reuses the cached checkout + build via --skip-build). + run: | + node dist/index.js sdk "$SDK_REF" --mode client --suite all -o results + node dist/index.js sdk "$SDK_REF" --mode server --suite all --skip-build -o results + + - name: Fail if no results were produced + run: | + if [ -z "$(find results -name checks.json -print -quit 2>/dev/null)" ]; then + echo "No checks.json produced — the suite run failed; not proposing a manifest." + exit 1 + fi + + - uses: actions/upload-artifact@v4 + with: + name: conformance-results + path: results + retention-days: 7 + + propose: + needs: run + runs-on: ubuntu-latest + # Requires the repo/org setting "Allow GitHub Actions to create and approve + # pull requests" to be enabled, otherwise `gh pr create` fails. + permissions: + contents: write + pull-requests: write + env: + SDK_REF: ${{ inputs.sdk || 'typescript-sdk@main' }} + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-node@v6 + with: + node-version: 24 + cache: npm + - run: npm ci + - run: npm run build + + - uses: actions/download-artifact@v4 + with: + name: conformance-results + path: results + + - name: Regenerate manifest + run: | + set -euo pipefail + # Record the resolved sha (stable per SDK commit) so the manifest's + # `source` only changes when the SDK actually advances — no per-run noise. + ref="${SDK_REF#*@}" + sha="$(git ls-remote https://github.com/modelcontextprotocol/typescript-sdk.git "$ref" | cut -f1)" + node dist/index.js traceability --results results \ + --source "typescript-sdk@${sha:0:12}" + + - name: Open/update the rolling refresh PR + env: + GH_TOKEN: ${{ github.token }} + run: | + set -euo pipefail + if git diff --quiet -- src/seps/traceability.json; then + echo "traceability.json unchanged" + exit 0 + fi + # One rolling branch/PR, force-updated each run, so the schedule does + # not accrue a new PR every week. + branch="traceability-refresh" + git config user.name 'github-actions[bot]' + git config user.email 'github-actions[bot]@users.noreply.github.com' + git checkout -B "$branch" + git add src/seps/traceability.json + git commit -m "chore: refresh SEP traceability manifest ($SDK_REF)" + git push --force origin "$branch" + gh pr view "$branch" >/dev/null 2>&1 || gh pr create \ + --head "$branch" \ + --title 'chore: refresh SEP traceability manifest' \ + --body 'Automated refresh from a conformance run against the reference SDK. Review the coverage diff before merging.' diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..43399fcc --- /dev/null +++ b/.prettierignore @@ -0,0 +1,5 @@ +# Generated by `conformance traceability` — formatting is owned by the +# generator (deterministic JSON.stringify), not Prettier. Without this, the +# repo's `prettier --check .` would reformat the file and fight the generator's +# output (and the refresh workflow's `git diff` check). +src/seps/traceability.json diff --git a/AGENTS.md b/AGENTS.md index c91484eb..5e915fff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -82,6 +82,27 @@ npx @modelcontextprotocol/conformance new-sep The command looks up PR #`` in `modelcontextprotocol/modelcontextprotocol` (SEP numbers are PR numbers), derives `spec_url` from the `docs/specification/draft/*.mdx` file it changes, and writes `src/seps/sep-.yaml` with TODO `requirements[]` rows. Use `--spec-path` or `--spec-url` to skip the lookup. The `new-sep` Claude Code skill drives the same flow end-to-end, parses the spec diff, and fills in the requirement rows. +### Traceability manifest + +`src/seps/traceability.json` is a generated map of, per SEP, which declared `check:` IDs are actually emitted when the conformance suite runs against the reference SDK. It is consumed by plan.modelcontextprotocol.io to track SEP-2484 progress. + +The emitted check IDs come from a real suite run (not a source scan), so dynamic (template-literal) IDs resolve to their concrete values. Generate the manifest from a results directory: + +```sh +# 1. Run the suite against the reference SDK, collecting checks.json files: +node dist/index.js client --command '' --suite all -o results +node dist/index.js server --url '' --suite all -o results +# 2. Build the manifest from those results: +npm run traceability -- --results results +npm run traceability -- --results results --strict # exit 1 on any untested (advisory) +``` + +Manifest shape: `{ schemaVersion, docs, source, seps }`, where `seps` is keyed by SEP number. Each requirement is `tested` (its check ID was emitted) or `untested` (declared but never emitted — a real gap, or a check that only fires against a deliberately-broken impl, i.e. it needs a negative test). `"tested" means a scenario emitted the check ID, NOT that any SDK passes it` — per-SDK results live in `tier-check`. Matching is exact, so a scenario's emitted check IDs must match the requirement slugs in the yaml (one check ID per MUST/SHOULD, emitted once per case). `source` records what was run against (e.g. `typescript-sdk@`); the `docs` field points back here. + +Contract for consumers (plan.mcp.io): a SEP appears only if it has a traceability yaml or emits `sep-NNNN-*` check IDs. **A SEP absent from the manifest has no conformance artifacts — treat it as not-started** (diff against your own SEP list to find them). `untracked` lists emitted IDs with no yaml row (usually scenario gates). + +The manifest is refreshed by `.github/workflows/traceability.yml` (manual/scheduled), which runs the suite against typescript-sdk and opens a PR with the diff — it is **not** a PR gate. Untested checks are advisory for now; the intended future policy is that an untested check must be backed by a negative test. + ## Examples: prove it passes and fails A new scenario should come with: @@ -101,3 +122,4 @@ Use the existing CLI runner (`npx @modelcontextprotocol/conformance client|serve - `npm test` passes - For non-trivial scenario changes, run against at least one real SDK (typescript-sdk or python-sdk) to see actual output. For changes to shared infrastructure (runner, tier-check), test against go-sdk or csharp-sdk too. - Scenario is registered in the right suite in `src/scenarios/index.ts` +- If you changed a `sep-*.yaml` or scenario check IDs, `src/seps/traceability.json` will drift; the traceability workflow refreshes it via PR (or regenerate locally with `--results` from a suite run) diff --git a/package.json b/package.json index 79535b0a..621436f7 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "lint:fix": "eslint src/ examples/ --fix && prettier --write .", "lint:fix_check": "npm run lint:fix && git diff --exit-code --quiet", "tier-check": "node dist/index.js tier-check", + "traceability": "tsx src/index.ts traceability", "check": "npm run typecheck && npm run lint", "typecheck": "tsgo --noEmit", "prepack": "npm run build", diff --git a/src/index.ts b/src/index.ts index a3431cd3..a3019f0a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -47,6 +47,7 @@ import { import { createTierCheckCommand } from './tier-check'; import { createNewSepCommand } from './new-sep'; import { createSdkCommand } from './sdk-runner'; +import { createTraceabilityCommand } from './traceability'; import packageJson from '../package.json'; // Note on naming: `command` refers to which CLI command is calling this. @@ -548,6 +549,9 @@ program.addCommand(createNewSepCommand()); // SDK command - run local conformance against an SDK at a specific ref program.addCommand(createSdkCommand()); +// SEP traceability manifest command +program.addCommand(createTraceabilityCommand()); + // List scenarios command program .command('list') diff --git a/src/seps/traceability.json b/src/seps/traceability.json new file mode 100644 index 00000000..5618dd41 --- /dev/null +++ b/src/seps/traceability.json @@ -0,0 +1,409 @@ +{ + "schemaVersion": 1, + "docs": "https://github.com/modelcontextprotocol/conformance/blob/main/AGENTS.md#traceability-manifest", + "source": "typescript-sdk@6f0bf49d", + "seps": { + "2164": { + "yaml": "src/seps/sep-2164.yaml", + "specUrl": "https://modelcontextprotocol.io/specification/draft/server/resources#error-handling", + "requirements": [ + { + "check": "sep-2164-no-empty-contents", + "status": "tested", + "text": "Servers MUST NOT return an empty contents array for a non-existent resource" + }, + { + "check": "sep-2164-error-code", + "status": "tested", + "text": "Servers SHOULD return standard JSON-RPC errors for common failure cases: Resource not found: -32602 (Invalid Params)" + } + ], + "excluded": [ + { + "text": "clients SHOULD also accept -32002 as a resource not found error", + "reason": "Client-side error handling is implementation-defined; not protocol-observable" + } + ], + "unkeyed": [], + "untracked": [ + "sep-2164-data-uri" + ], + "summary": { + "tested": 2, + "untested": 0, + "excluded": 1, + "untracked": 1, + "unkeyed": 0 + } + }, + "2207": { + "yaml": null, + "specUrl": null, + "requirements": [], + "excluded": [], + "unkeyed": [], + "untracked": [ + "sep-2207-client-metadata-grant-types", + "sep-2207-offline-access-not-requested", + "sep-2207-offline-access-requested" + ], + "summary": { + "tested": 0, + "untested": 0, + "excluded": 0, + "untracked": 3, + "unkeyed": 0 + } + }, + "2243": { + "yaml": "src/seps/sep-2243.yaml", + "specUrl": "https://modelcontextprotocol.io/specification/draft/basic/transports#standard-mcp-request-headers", + "requirements": [ + { + "check": "sep-2243-client-includes-standard-headers", + "status": "tested", + "text": "The client MUST include the standard MCP request headers on each POST request. These headers are REQUIRED for compliance." + }, + { + "check": "sep-2243-header-name-case-insensitive", + "status": "tested", + "text": "Clients and servers MUST use case-insensitive comparisons for header names." + }, + { + "check": "sep-2243-server-reject-invalid-headers", + "status": "tested", + "text": "Servers that process the request body MUST reject requests with mismatched or missing standard-header values, returning HTTP 400 Bad Request." + }, + { + "check": "sep-2243-server-reject-error-code", + "status": "tested", + "text": "When rejecting a request due to header validation failure, servers SHOULD include a JSON-RPC error response using error code -32001." + }, + { + "check": "sep-2243-client-supports-custom-headers", + "status": "untested", + "text": "MCP clients MUST support this feature [custom headers via x-mcp-header]." + }, + { + "check": "sep-2243-client-mirrors-designated-params", + "status": "untested", + "text": "When a client invokes a tool whose definition includes such designations, conforming clients MUST mirror the designated parameter values into HTTP headers as described below." + }, + { + "check": "sep-2243-x-mcp-header-not-empty", + "status": "untested", + "text": "The x-mcp-header value MUST NOT be empty.", + "url": "https://modelcontextprotocol.io/specification/draft/server/tools#custom-headers" + }, + { + "check": "sep-2243-x-mcp-header-charset", + "status": "untested", + "text": "The x-mcp-header value MUST contain only ASCII characters (excluding space and `:`).", + "url": "https://modelcontextprotocol.io/specification/draft/server/tools#custom-headers" + }, + { + "check": "sep-2243-x-mcp-header-unique", + "status": "untested", + "text": "The x-mcp-header value MUST be case-insensitively unique within a single tool definition.", + "url": "https://modelcontextprotocol.io/specification/draft/server/tools#custom-headers" + }, + { + "check": "sep-2243-x-mcp-header-primitive-only", + "status": "untested", + "text": "x-mcp-header MUST only be applied to parameters with primitive types (number, string, or boolean).", + "url": "https://modelcontextprotocol.io/specification/draft/server/tools#custom-headers" + }, + { + "check": "sep-2243-client-reject-invalid-tool", + "status": "tested", + "text": "Clients MUST reject tool definitions where any x-mcp-header value violates these constraints. Rejection means the client MUST exclude the invalid tool from the set of tools returned by tools/list.", + "url": "https://modelcontextprotocol.io/specification/draft/server/tools#custom-headers" + }, + { + "check": "sep-2243-client-encode-values", + "status": "untested", + "text": "Clients MUST encode parameter values before including them in HTTP headers: number values MUST be converted to their decimal string representation; boolean values MUST be converted to the lowercase strings \"true\" or \"false\"." + }, + { + "check": "sep-2243-client-base64-unsafe", + "status": "untested", + "text": "When a value cannot be safely represented as plain ASCII (e.g., contains non-ASCII characters, control characters, or leading/trailing whitespace), clients MUST use Base64 encoding of the UTF-8 representation, wrapped as =?base64?{encoded}?=." + }, + { + "check": "sep-2243-server-decode-base64", + "status": "untested", + "text": "Servers and intermediaries that need to inspect these values MUST decode them accordingly." + }, + { + "check": "sep-2243-client-omit-null", + "status": "tested", + "text": "Parameter value is null or omitted: Client MUST omit the header." + }, + { + "check": "sep-2243-server-not-expect-null", + "status": "untested", + "text": "Parameter value is null or omitted: Server MUST NOT expect the header." + }, + { + "check": "sep-2243-server-reject-missing-required", + "status": "untested", + "text": "Required parameter is omitted: Server MUST reject with JSON-RPC error." + }, + { + "check": "sep-2243-server-reject-invalid-param-chars", + "status": "untested", + "text": "Servers MUST reject requests with a recognized Mcp-Param-{Name} header that contain invalid characters." + }, + { + "check": "sep-2243-server-validate-param-match", + "status": "untested", + "text": "Any server that processes the message body MUST validate that encoded header values, after decoding if Base64-encoded, match the corresponding parameter values in the body." + }, + { + "check": "sep-2243-server-reject-param-mismatch", + "status": "untested", + "text": "Servers MUST reject requests with a 400 Bad Request HTTP status and JSON-RPC error code -32001 if any validation fails." + } + ], + "excluded": [ + { + "text": "Clients SHOULD log a warning when rejecting a tool definition due to invalid x-mcp-header, including the tool name and the reason.", + "reason": "Log output is not wire-observable." + }, + { + "text": "Server developers SHOULD NOT mark sensitive parameters (such as passwords, API keys, tokens, or PII) with x-mcp-header.", + "reason": "Design guidance to humans; not protocol-observable." + }, + { + "text": "Intermediaries MUST return an appropriate HTTP error status for validation failures.", + "reason": "Intermediary requirement; conformance harness tests clients and servers, not intermediaries." + }, + { + "text": "Intermediate servers that do not recognize an Mcp-Param-{Name} header MUST forward it and otherwise ignore it.", + "reason": "Intermediary requirement; conformance harness tests clients and servers, not intermediaries." + } + ], + "unkeyed": [], + "untracked": [ + "sep-2243-invalid-tool-tools-list-gate", + "sep-2243-param-header-tool-call-gate", + "sep-2243-server-accepts-whitespace-header-value", + "sep-2243-server-no-xmcp-tool" + ], + "summary": { + "tested": 6, + "untested": 14, + "excluded": 4, + "untracked": 4, + "unkeyed": 0 + } + }, + "2575": { + "yaml": "src/seps/sep-2575.yaml", + "specUrl": "https://modelcontextprotocol.io/specification/draft/basic/lifecycle", + "requirements": [ + { + "check": "sep-2575-client-populates-meta", + "status": "untested", + "text": "Every client request MUST include the following io.modelcontextprotocol/* fields in _meta: protocolVersion, clientInfo, clientCapabilities.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/index#meta" + }, + { + "check": "sep-2575-server-rejects-undeclared-capability", + "status": "untested", + "text": "A server MUST NOT rely on capabilities the client has not declared. If processing a request requires a capability the client did not include in io.modelcontextprotocol/clientCapabilities, the server MUST return a MissingRequiredClientCapabilityError (-32003).", + "url": "https://modelcontextprotocol.io/specification/draft/basic/index#meta" + }, + { + "check": "sep-2575-missing-capability-http-400", + "status": "untested", + "text": "On HTTP, the response status MUST be 400 Bad Request [for MissingRequiredClientCapabilityError].", + "url": "https://modelcontextprotocol.io/specification/draft/basic/index#meta" + }, + { + "check": "sep-2575-server-tags-subscription-id", + "status": "untested", + "text": "On notifications delivered via a subscriptions/listen stream, the server MUST include io.modelcontextprotocol/subscriptionId in _meta so the client can correlate the notification with the originating subscription request.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/index#meta" + }, + { + "check": "sep-2575-server-stateless-no-prior-context", + "status": "untested", + "text": "A server MUST NOT treat connection or process identity as a proxy for conversation or session continuity. / Servers MUST NOT rely on prior requests over the same connection to establish context (e.g., capabilities, protocol version, client identity)." + }, + { + "check": "sep-2575-server-stateless-no-connection-reuse-required", + "status": "untested", + "text": "Servers MUST NOT require that a client reuse the same connection to perform related operations." + }, + { + "check": "sep-2575-server-unsupported-version-error", + "status": "untested", + "text": "If the server does not implement the requested version (whether the version is unknown to the server, or is a known version the server has chosen not to support), it MUST respond with an UnsupportedProtocolVersionError listing the versions it does support.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/lifecycle#protocol-version-negotiation" + }, + { + "check": "sep-2575-client-retry-supported-version", + "status": "untested", + "text": "The client SHOULD select a mutually supported version from the supported list and retry the request, or surface an error to the user if no compatible version exists.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/lifecycle#protocol-version-negotiation" + }, + { + "check": "sep-2575-server-implements-discover", + "status": "untested", + "text": "Servers MUST implement server/discover.", + "url": "https://modelcontextprotocol.io/specification/draft/server/discover" + }, + { + "check": "sep-2575-http-server-no-independent-requests-on-stream", + "status": "untested", + "text": "The server MUST NOT send independent JSON-RPC requests on this stream. Server-to-client interactions are embedded as input requests inside an IncompleteResult.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#receiving-messages-1" + }, + { + "check": "sep-2575-http-server-disconnect-is-cancel", + "status": "untested", + "text": "Closing the SSE response stream MUST be treated by the server as cancellation of that request.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#cancellation-1" + }, + { + "check": "sep-2575-http-server-stops-on-cancel", + "status": "untested", + "text": "The server SHOULD stop work on the cancelled request as soon as practical and MUST NOT send any further messages for it [HTTP].", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#cancellation-1" + }, + { + "check": "sep-2575-http-client-sends-version-header", + "status": "untested", + "text": "Every POST request to the MCP endpoint MUST include an MCP-Protocol-Version header.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#protocol-version-header" + }, + { + "check": "sep-2575-http-version-header-matches-meta", + "status": "untested", + "text": "The header value MUST match the io.modelcontextprotocol/protocolVersion field carried in the request body _meta.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#protocol-version-header" + }, + { + "check": "sep-2575-http-server-header-mismatch-400", + "status": "untested", + "text": "If the values do not match, the server MUST reject the request with 400 Bad Request and a HeaderMismatch JSON-RPC error.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#protocol-version-header" + }, + { + "check": "sep-2575-http-server-unsupported-version-400", + "status": "untested", + "text": "If the server does not implement the requested protocol version, it MUST respond with 400 Bad Request and an UnsupportedProtocolVersionError listing its supported versions.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#protocol-version-header" + }, + { + "check": "sep-2575-http-server-method-not-found-404", + "status": "untested", + "text": "If the server does not implement the requested RPC method, it MUST respond with 404 Not Found and a JSON-RPC error with code -32601 (Method not found).", + "url": "https://modelcontextprotocol.io/specification/draft/basic/transports#protocol-version-header" + }, + { + "check": "sep-2575-server-honors-notification-filter", + "status": "untested", + "text": "The server MUST NOT send notification types the client has not explicitly requested.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/utilities/subscriptions#opening-a-stream" + }, + { + "check": "sep-2575-server-sends-subscription-ack", + "status": "untested", + "text": "The server MUST send notifications/subscriptions/acknowledged as the first message on the stream.", + "url": "https://modelcontextprotocol.io/specification/draft/basic/utilities/subscriptions#acknowledgment" + }, + { + "check": "sep-2575-client-declares-elicitation-capability", + "status": "untested", + "text": "Clients that support elicitation MUST declare the elicitation capability in _meta.io.modelcontextprotocol/clientCapabilities on each request.", + "url": "https://modelcontextprotocol.io/specification/draft/client/elicitation#capabilities" + }, + { + "check": "sep-2575-client-declares-roots-capability", + "status": "untested", + "text": "Clients that support roots MUST declare the roots capability in _meta.io.modelcontextprotocol/clientCapabilities on each request.", + "url": "https://modelcontextprotocol.io/specification/draft/client/roots#capabilities" + }, + { + "check": "sep-2575-client-declares-sampling-capability", + "status": "untested", + "text": "Clients that support sampling MUST declare the sampling capability in _meta.io.modelcontextprotocol/clientCapabilities on each request.", + "url": "https://modelcontextprotocol.io/specification/draft/client/sampling#capabilities" + }, + { + "check": "sep-2575-server-declares-prompts-in-discover", + "status": "untested", + "text": "Servers that support prompts MUST declare the prompts capability in their DiscoverResult.", + "url": "https://modelcontextprotocol.io/specification/draft/server/prompts#capabilities" + }, + { + "check": "sep-2575-server-sends-prompts-list-changed-on-subscription", + "status": "untested", + "text": "[A server with the listChanged] capability SHOULD send a notification to clients that have opened a subscriptions/listen stream with promptsListChanged: true.", + "url": "https://modelcontextprotocol.io/specification/draft/server/prompts#list-changed-notification" + }, + { + "check": "sep-2575-server-sends-tools-list-changed-on-subscription", + "status": "untested", + "text": "[A server with the listChanged] capability SHOULD send a notification to clients that have opened a subscriptions/listen stream with toolsListChanged: true.", + "url": "https://modelcontextprotocol.io/specification/draft/server/tools#list-changed-notification" + }, + { + "check": "sep-2575-server-no-log-without-loglevel", + "status": "untested", + "text": "The server MUST NOT emit notifications/message for a request that does not include [io.modelcontextprotocol/logLevel in _meta].", + "url": "https://modelcontextprotocol.io/specification/draft/server/utilities/logging#per-request-log-level" + } + ], + "excluded": [ + { + "text": "State that needs to span multiple requests (e.g., long-running tasks, application-level handles) MUST be referenced by an explicit identifier the client passes on each request.", + "reason": "architectural guidance, observable only via subscriptionId/task-id rows already listed" + }, + { + "text": "To distinguish notifications belonging to different concurrent subscriptions, clients MUST correlate notifications using the io.modelcontextprotocol/subscriptionId field carried in _meta.", + "reason": "client-internal demux; not observable on the wire from the harness" + }, + { + "text": "The client SHOULD check the acknowledged filter against what it requested and handle any unsupported types gracefully.", + "reason": "internal comparison; \"gracefully\" has no wire-observable definition" + }, + { + "text": "Because there is no per-request status code to drive fallback, a client that supports both eras SHOULD probe with server/discover first [stdio backward compatibility].", + "reason": "stdio client harness not implemented — see https://github.com/modelcontextprotocol/conformance/issues/258" + }, + { + "text": "To cancel an in-flight request [on stdio], the client MUST send a notifications/cancelled notification referencing the request ID.", + "reason": "stdio client harness not implemented — see https://github.com/modelcontextprotocol/conformance/issues/258" + }, + { + "text": "Servers SHOULD stop work on a cancelled request as soon as practical and MUST NOT send any further messages for it [stdio].", + "reason": "stdio client harness not implemented — see https://github.com/modelcontextprotocol/conformance/issues/258" + }, + { + "text": "If the server process exits unexpectedly, the client SHOULD restart it.", + "reason": "stdio client harness not implemented — see https://github.com/modelcontextprotocol/conformance/issues/258" + }, + { + "text": "If the server returns UnsupportedProtocolVersionError, [the stdio client] SHOULD retry using one of the advertised supportedVersions rather than falling back to initialize.", + "reason": "stdio client harness not implemented — see https://github.com/modelcontextprotocol/conformance/issues/258" + }, + { + "text": "On stdio, if the connection is terminated and then re-established, the client MUST re-send subscriptions/listen to re-establish its subscriptions.", + "reason": "stdio client harness not implemented — see https://github.com/modelcontextprotocol/conformance/issues/258" + } + ], + "unkeyed": [], + "untracked": [], + "summary": { + "tested": 0, + "untested": 26, + "excluded": 9, + "untracked": 0, + "unkeyed": 0 + } + } + } +} diff --git a/src/traceability/index.test.ts b/src/traceability/index.test.ts new file mode 100644 index 00000000..721365e6 --- /dev/null +++ b/src/traceability/index.test.ts @@ -0,0 +1,124 @@ +import { describe, it, expect } from 'vitest'; +import { computeTraceability, DeclaredSep } from './index'; + +const decl = ( + sep: number, + requirements: DeclaredSep['requirements'], + yaml = `src/seps/sep-${sep}.yaml`, + specUrl: string | null = `https://modelcontextprotocol.io/sep-${sep}` +): DeclaredSep => ({ sep, yaml, specUrl, requirements }); + +describe('computeTraceability', () => { + it('marks a declared check tested when its ID was emitted', () => { + const m = computeTraceability({ + declared: [decl(2164, [{ check: 'sep-2164-error-code', text: 'x' }])], + emitted: new Set(['sep-2164-error-code']) + }); + expect(m.seps['2164'].requirements[0]).toEqual({ + check: 'sep-2164-error-code', + text: 'x', + status: 'tested' + }); + expect(m.seps['2164'].summary.tested).toBe(1); + }); + + it('marks a declared check untested when its ID was not emitted', () => { + const m = computeTraceability({ + declared: [decl(2164, [{ check: 'sep-2164-missing' }])], + emitted: new Set() + }); + expect(m.seps['2164'].requirements[0].status).toBe('untested'); + expect(m.seps['2164'].summary.untested).toBe(1); + }); + + it('propagates text, url, and issue onto requirement rows', () => { + const m = computeTraceability({ + declared: [ + decl(2243, [ + { + check: 'sep-2243-x', + text: 'The client MUST do X', + url: 'https://spec/x#y', + issue: 'https://gh/1' + } + ]) + ], + emitted: new Set(['sep-2243-x']) + }); + expect(m.seps['2243'].requirements[0]).toEqual({ + check: 'sep-2243-x', + text: 'The client MUST do X', + url: 'https://spec/x#y', + issue: 'https://gh/1', + status: 'tested' + }); + }); + + it('collects excluded rows with reasons and issue links', () => { + const m = computeTraceability({ + declared: [ + decl(2243, [ + { + text: 'intermediary rule', + excluded: 'not tested', + issue: 'https://x/1' + } + ]) + ], + emitted: new Set() + }); + expect(m.seps['2243'].excluded).toEqual([ + { text: 'intermediary rule', reason: 'not tested', issue: 'https://x/1' } + ]); + expect(m.seps['2243'].requirements).toEqual([]); + }); + + it('lists rows with neither check nor excluded as unkeyed', () => { + const m = computeTraceability({ + declared: [decl(2243, [{ text: 'orphan row' }])], + emitted: new Set() + }); + expect(m.seps['2243'].unkeyed).toEqual([{ text: 'orphan row' }]); + expect(m.seps['2243'].summary.unkeyed).toBe(1); + }); + + it('reports emitted IDs with no yaml row as untracked', () => { + const m = computeTraceability({ + declared: [decl(2164, [{ check: 'sep-2164-error-code' }])], + emitted: new Set(['sep-2164-error-code', 'sep-2164-extra-check']) + }); + expect(m.seps['2164'].untracked).toEqual(['sep-2164-extra-check']); + }); + + it('includes SEPs with emitted IDs but no yaml (tests without traceability)', () => { + const m = computeTraceability({ + declared: [], + emitted: new Set(['sep-2207-offline-access-requested']) + }); + expect(m.seps['2207'].yaml).toBeNull(); + expect(m.seps['2207'].requirements).toEqual([]); + expect(m.seps['2207'].untracked).toEqual([ + 'sep-2207-offline-access-requested' + ]); + }); + + it('sorts SEP keys numerically and stamps schema/meaning/source', () => { + const m = computeTraceability({ + declared: [ + decl(2243, [{ check: 'sep-2243-a' }]), + decl(414, [{ check: 'sep-414-a' }]) + ], + emitted: new Set(), + source: 'typescript-sdk@abc123' + }); + expect(Object.keys(m.seps)).toEqual(['414', '2243']); + expect(m.schemaVersion).toBe(1); + expect(m.docs).toMatch(/^https?:\/\//); + expect(m.source).toBe('typescript-sdk@abc123'); + }); + + it('defaults source to null', () => { + const m = computeTraceability({ declared: [], emitted: new Set() }); + expect(m.source).toBeNull(); + }); +}); diff --git a/src/traceability/index.ts b/src/traceability/index.ts new file mode 100644 index 00000000..055076da --- /dev/null +++ b/src/traceability/index.ts @@ -0,0 +1,316 @@ +import { Command } from 'commander'; +import { existsSync, readFileSync, readdirSync, writeFileSync } from 'fs'; +import path from 'path'; +import { parse as parseYaml } from 'yaml'; +import { + TRACEABILITY_SCHEMA_VERSION, + TraceabilityManifest, + ExcludedRequirement, + RequirementTraceability, + SepTraceability, + UnkeyedRequirement +} from './types'; + +const SEPS_DIR = 'src/seps'; +const OUT_FILE = path.join(SEPS_DIR, 'traceability.json'); + +const DOCS = + 'https://github.com/modelcontextprotocol/conformance/blob/main/AGENTS.md#traceability-manifest'; + +// A yaml requirement row (mirrors new-sep's RequirementRow). +interface RawRequirement { + text?: string; + check?: string; + excluded?: string; + issue?: string; + url?: string; +} +interface RawSepYaml { + sep?: number; + spec_url?: string; + requirements?: RawRequirement[]; +} + +export interface DeclaredSep { + sep: number; + yaml: string; + specUrl: string | null; + requirements: RawRequirement[]; +} + +const CHECK_ID_RE = /^sep-\d+-/; + +function sepOf(id: string): number | null { + const m = id.match(/^sep-(\d+)-/); + return m ? Number(m[1]) : null; +} + +/** + * Pure: join declared requirements against the emitted check-ID set into the + * manifest. A requirement is "tested" iff its check ID was emitted by the run. + * No filesystem access — fully testable. + */ +export function computeTraceability(args: { + declared: DeclaredSep[]; + emitted: Set; + source?: string | null; +}): TraceabilityManifest { + const { declared, emitted } = args; + + const emittedBySep = new Map>(); + for (const id of emitted) { + const sep = sepOf(id); + if (sep === null) continue; + let set = emittedBySep.get(sep); + if (!set) emittedBySep.set(sep, (set = new Set())); + set.add(id); + } + + const declaredBySep = new Map(); + for (const d of declared) declaredBySep.set(d.sep, d); + + const allSeps = [ + ...new Set([...declaredBySep.keys(), ...emittedBySep.keys()]) + ].sort((a, b) => a - b); + + const seps: Record = {}; + + for (const sep of allSeps) { + const d = declaredBySep.get(sep); + const emittedIds = emittedBySep.get(sep) ?? new Set(); + + const requirements: RequirementTraceability[] = []; + const excluded: ExcludedRequirement[] = []; + const unkeyed: UnkeyedRequirement[] = []; + const declaredCheckIds = new Set(); + + for (const r of d?.requirements ?? []) { + const check = r.check; + if (check) { + declaredCheckIds.add(check); + requirements.push({ + check, + status: emittedIds.has(check) ? 'tested' : 'untested', + ...(r.text ? { text: r.text } : {}), + ...(r.url ? { url: r.url } : {}), + ...(r.issue ? { issue: r.issue } : {}) + }); + } else if (r.excluded) { + excluded.push({ + text: r.text ?? '', + reason: r.excluded, + ...(r.issue ? { issue: r.issue } : {}) + }); + } else { + unkeyed.push({ text: r.text ?? '' }); + } + } + + // Untracked: emitted IDs not declared in any yaml row. + const untracked = [...emittedIds] + .filter((id) => !declaredCheckIds.has(id)) + .sort(); + + seps[String(sep)] = { + yaml: d?.yaml ?? null, + specUrl: d?.specUrl ?? null, + requirements, + excluded, + unkeyed, + untracked, + summary: { + tested: requirements.filter((r) => r.status === 'tested').length, + untested: requirements.filter((r) => r.status === 'untested').length, + excluded: excluded.length, + untracked: untracked.length, + unkeyed: unkeyed.length + } + }; + } + + return { + schemaVersion: TRACEABILITY_SCHEMA_VERSION, + docs: DOCS, + source: args.source ?? null, + seps + }; +} + +/** Serialize deterministically (sorted SEP keys, trailing newline). */ +export function serializeManifest(manifest: TraceabilityManifest): string { + return JSON.stringify(manifest, null, 2) + '\n'; +} + +// --- filesystem gathering (not unit-tested; thin IO wrappers) ------------- + +/** Recursively collect emitted sep-NNNN-* check IDs from checks.json files. */ +export function collectEmittedIds(resultsDir: string): Set { + const ids = new Set(); + let entries: string[]; + try { + entries = readdirSync(resultsDir, { recursive: true, encoding: 'utf-8' }); + } catch { + return ids; + } + for (const rel of entries) { + if (path.basename(rel) !== 'checks.json') continue; + try { + const arr = JSON.parse(readFileSync(path.join(resultsDir, rel), 'utf8')); + if (!Array.isArray(arr)) continue; + for (const c of arr) { + if (c && typeof c.id === 'string' && CHECK_ID_RE.test(c.id)) + ids.add(c.id); + } + } catch { + // skip unreadable/partial result files + } + } + return ids; +} + +export function gatherDeclared(sepsDir = SEPS_DIR): DeclaredSep[] { + const out: DeclaredSep[] = []; + const files = readdirSync(sepsDir) + .filter((f) => /^sep-\d+\.yaml$/.test(f)) + .sort(); + for (const f of files) { + const full = path.join(sepsDir, f); + const doc = (parseYaml(readFileSync(full, 'utf8')) ?? {}) as RawSepYaml; + const fileSep = Number(f.match(/^sep-(\d+)\.yaml$/)![1]); + if (!Number.isInteger(doc.sep)) { + console.warn(`WARN ${f}: missing/invalid \`sep:\`; skipping`); + continue; + } + if (doc.sep !== fileSep) { + console.warn( + `WARN ${f}: filename SEP ${fileSep} != doc.sep ${doc.sep}; using doc.sep` + ); + } + out.push({ + sep: doc.sep as number, + yaml: full, + specUrl: doc.spec_url ?? null, + requirements: doc.requirements ?? [] + }); + } + return out; +} + +/** Print per-SEP gaps to stderr. */ +function reportGaps(manifest: TraceabilityManifest): void { + for (const [sep, c] of Object.entries(manifest.seps)) { + const untested = c.requirements.filter((r) => r.status === 'untested'); + if (!untested.length && !c.summary.unkeyed && !c.summary.untracked) + continue; + + const bits: string[] = []; + if (untested.length) bits.push(`${untested.length} untested`); + if (c.summary.unkeyed) bits.push(`${c.summary.unkeyed} unkeyed`); + if (c.summary.untracked) bits.push(`${c.summary.untracked} untracked`); + console.error( + `sep-${sep}: ${bits.join(', ')}${c.yaml ? '' : ' (no yaml)'}` + ); + for (const r of untested) console.error(` untested: ${r.check}`); + for (const id of c.untracked) console.error(` untracked: ${id}`); + } +} + +const HELP_EPILOG = ` +"tested" means a scenario emitted the check ID when the conformance suite ran +against the reference SDK — NOT that any SDK passes it. "untested" means the +declared check ID was never emitted (a real gap, or a check that only fires +against a broken impl / a feature the reference SDK has not implemented). + +--results is required: point it at the output of a suite run against the +reference SDK. Produce one with the sdk runner (clones+builds+runs the SDK), +once per side into the same dir: + conformance sdk typescript-sdk@ --mode client --suite all -o + conformance sdk typescript-sdk@ --mode server --suite all --skip-build -o +Check IDs are collected from /**/checks.json. + +--source records what the run was against (e.g. "typescript-sdk@"). +--allow-empty writes even when no check IDs were collected (default: refuse). +--check exits 1 if the on-disk traceability.json differs from a fresh compute. +--strict exits 1 on any untested requirement (advisory for now).`; + +export function createTraceabilityCommand(): Command { + return new Command('traceability') + .description( + 'Generate src/seps/traceability.json: a manifest mapping declared SEP ' + + 'requirements to conformance scenarios that emit their check IDs' + ) + .addHelpText('after', HELP_EPILOG) + .requiredOption( + '--results ', + 'Results dir from a suite run against the reference SDK ' + + '(reads /**/checks.json)' + ) + .option( + '--source ', + 'What the run was against, recorded in the manifest (e.g. typescript-sdk@)' + ) + .option( + '--allow-empty', + 'Write even when zero check IDs were collected (default: refuse)' + ) + .option( + '--check', + 'Do not write; exit 1 if the on-disk traceability.json is stale' + ) + .option('--strict', 'Exit 1 if any declared requirement is untested') + .action((options) => { + if (!existsSync(options.results)) { + console.error(`results dir not found: ${options.results}`); + process.exit(1); + } + const declared = gatherDeclared(); + const emitted = collectEmittedIds(options.results); + + // Guard the footgun: an empty/wrong results dir would mark everything + // untested and silently clobber the manifest. + if (emitted.size === 0 && !options.allowEmpty) { + console.error( + `no sep-NNNN-* check IDs found under ${options.results} — did the ` + + `suite run write checks.json there? Pass --allow-empty to override.` + ); + process.exit(1); + } + + const manifest = computeTraceability({ + declared, + emitted, + source: options.source ?? null + }); + const serialized = serializeManifest(manifest); + const untestedTotal = Object.values(manifest.seps).reduce( + (n, c) => n + c.summary.untested, + 0 + ); + + if (options.check) { + let current = ''; + try { + current = readFileSync(OUT_FILE, 'utf8'); + } catch { + // missing file -> stale + } + if (current !== serialized) { + console.error( + `${OUT_FILE} is out of date. Regenerate with ` + + `\`npm run traceability -- --results ${options.results}\`, ` + + `review with \`git diff ${OUT_FILE}\`, and commit.` + ); + process.exit(1); + } + console.log(`${OUT_FILE} is up to date.`); + } else { + writeFileSync(OUT_FILE, serialized); + console.log( + `wrote ${OUT_FILE}: ${Object.keys(manifest.seps).length} SEP(s)` + ); + reportGaps(manifest); + } + + if (options.strict && untestedTotal > 0) process.exit(1); + }); +} diff --git a/src/traceability/types.ts b/src/traceability/types.ts new file mode 100644 index 00000000..8edc0221 --- /dev/null +++ b/src/traceability/types.ts @@ -0,0 +1,79 @@ +/** + * Shared types for the SEP traceability manifest (src/seps/traceability.json). + * + * IMPORTANT scope note: this manifest records whether a *conformance scenario + * exists* for each declared SEP requirement. It says NOTHING about whether any + * particular SDK passes that scenario — per-SDK pass/fail lives in `tier-check`. + * + * Joining the two is a future goal, NOT possible today: tier-check reports at + * scenario granularity and does not currently expose per-check IDs, while this + * manifest carries check IDs but not scenario names. Wiring plan.mcp.io's two + * feeds together needs one side to add the missing column first. + */ + +export const TRACEABILITY_SCHEMA_VERSION = 1; + +/** Status of a single declared requirement (a yaml `check:` row). */ +export type CheckStatus = + /** A matching check ID was emitted when the conformance suite ran. */ + | 'tested' + /** Declared, but no matching check ID was emitted by any scenario run. */ + | 'untested'; + +export interface RequirementTraceability { + check: string; + status: CheckStatus; + /** The normative sentence from the yaml (for tracker display). */ + text?: string; + /** Per-requirement spec URL from the yaml, if finer than the SEP's specUrl. */ + url?: string; + /** Tracking issue from the yaml, if any. */ + issue?: string; +} + +export interface ExcludedRequirement { + text: string; + reason: string; + issue?: string; +} + +/** A yaml row with neither `check:` nor `excluded:` (an authoring gap). */ +export interface UnkeyedRequirement { + text: string; +} + +export interface SepTraceability { + /** Path to the traceability yaml, or null if scenarios exist but no yaml. */ + yaml: string | null; + /** Spec URL from the yaml's `spec_url`, or null. */ + specUrl: string | null; + requirements: RequirementTraceability[]; + excluded: ExcludedRequirement[]; + unkeyed: UnkeyedRequirement[]; + /** + * Check IDs emitted by the suite run but not declared in any yaml row. + * Usually scenario scaffolding (gates) or extra checks beyond the SEP. + */ + untracked: string[]; + summary: { + tested: number; + untested: number; + excluded: number; + untracked: number; + unkeyed: number; + }; +} + +export interface TraceabilityManifest { + schemaVersion: number; + /** Pointer to where this file's semantics are documented (not prose-in-data). */ + docs: string; + /** + * What the emitted set was collected against, e.g. "typescript-sdk@". + * Provenance for consumers; no wall-clock timestamp so an unchanged run + * produces an empty diff. null when not supplied. + */ + source: string | null; + /** Keyed by SEP number (as a string). */ + seps: Record; +}