modelcontextprotocol · panyam · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/src/scenarios/index.ts b/src/scenarios/index.ts
@@ -64,6 +64,17 @@ import {
 
 import { DNSRebindingProtectionScenario } from './server/dns-rebinding';
 
+import { TasksLifecycleScenario } from './server/tasks/lifecycle';
+import { TasksCapabilityNegotiationScenario } from './server/tasks/capability';
+import { TasksWireFieldsScenario } from './server/tasks/wire-fields';
+import { TasksRequestStateRemovalScenario } from './server/tasks/request-state';
+import { TasksMRTRInputScenario } from './server/tasks/mrtr-input';
+import { TasksRequestHeadersScenario } from './server/tasks/headers';
+import { TasksDispatchScenario } from './server/tasks/dispatch';
+import { TasksStatusNotificationsScenario } from './server/tasks/notifications';
+import { TasksRequiredTaskErrorScenario } from './server/tasks/required-task-error';
+import { MrtrEphemeralFlowScenario } from './server/mrtr/ephemeral-flow';
+
 import {
   authScenariosList,
   backcompatScenariosList,
@@ -82,7 +93,29 @@ const pendingClientScenariosList: ClientScenario[] = [
 
   // On hold until server-side SSE improvements are made
   // https://github.com/modelcontextprotocol/typescript-sdk/pull/1129
-  new ServerSSEPollingScenario()
+  new ServerSSEPollingScenario(),
+
+  // SEP-2663 Tasks extension lifecycle.
+  // The everything-server does not implement the
+  // io.modelcontextprotocol/tasks extension, so all-scenarios.test.ts
+  // cannot exercise these against the default fixture. Active runs target
+  // a SEP-2663-conformant server via the dedicated
+  // tasks/all-scenarios.test.ts harness.
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateRemovalScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario(),
+  new TasksRequiredTaskErrorScenario(),
+
+  // SEP-2322 MRTR (ephemeral InputRequiredResult flow).
+  // Targets a different fixture than tasks scenarios; the dedicated
+  // mrtr/all-scenarios.test.ts runner points at an MRTR-conformant
+  // server via MRTR_SERVER_URL / MRTR_SERVER_CMD.
+  new MrtrEphemeralFlowScenario()
 ];
 
 // All client scenarios
@@ -140,7 +173,27 @@ const allClientScenariosList: ClientScenario[] = [
   new PromptsGetWithImageScenario(),
 
   // Security scenarios
-  new DNSRebindingProtectionScenario()
+  new DNSRebindingProtectionScenario(),
+
+  // SEP-2663 Tasks extension.
+  // Listed here so the CLI can find each scenario by name and so the
+  // active/pending filter sees it; pendingClientScenariosList above
+  // excludes them from automatic runs against the everything-server
+  // (which doesn't implement io.modelcontextprotocol/tasks yet).
+  new TasksLifecycleScenario(),
+  new TasksCapabilityNegotiationScenario(),
+  new TasksWireFieldsScenario(),
+  new TasksRequestStateRemovalScenario(),
+  new TasksMRTRInputScenario(),
+  new TasksRequestHeadersScenario(),
+  new TasksDispatchScenario(),
+  new TasksStatusNotificationsScenario(),
+  new TasksRequiredTaskErrorScenario(),
+
+  // SEP-2322 MRTR (ephemeral InputRequiredResult flow). Targets a
+  // dedicated MRTR fixture — out of scope for the default
+  // everything-server until SEP-2322 lands there.
+  new MrtrEphemeralFlowScenario()
 ];
 
 // Active client scenarios (excludes pending)

diff --git a/src/scenarios/server/_shared/test-runner.ts b/src/scenarios/server/_shared/test-runner.ts
@@ -0,0 +1,56 @@
+/**
+ * Test-runner utilities for server-conformance scenarios.
+ *
+ * Used by `*.test.ts` runner files that auto-spawn a fixture binary
+ * before running scenarios. These helpers are language-agnostic and
+ * harness-only — they don't touch MCP protocol, so they don't belong
+ * in the SDK.
+ *
+ * Single responsibility today: TCP readiness polling. Spawn / cleanup
+ * scaffolding stays inline in each runner so the file reads top-to-bottom
+ * without indirection (per AGENTS.md "repetitive check blocks are fine").
+ */
+
+import { connect } from 'net';
+
+/**
+ * Poll the host/port of the given URL until a TCP connection succeeds
+ * or the timeout elapses. Language-agnostic readiness check — works
+ * for any server that binds before serving requests.
+ */
+export async function waitForServerReady(
+  url: string,
+  timeoutMs: number
+): Promise<void> {
+  const u = new URL(url);
+  const port = parseInt(u.port || (u.protocol === 'https:' ? '443' : '80'), 10);
+  const host = u.hostname;
+  const deadline = Date.now() + timeoutMs;
+  let lastErr: Error | null = null;
+
+  while (Date.now() < deadline) {
+    try {
+      await new Promise<void>((resolve, reject) => {
+        const socket = connect({ host, port }, () => {
+          socket.end();
+          resolve();
+        });
+        socket.once('error', (err) => {
+          socket.destroy();
+          reject(err);
+        });
+        socket.setTimeout(1_000, () => {
+          socket.destroy();
+          reject(new Error('connect timeout'));
+        });
+      });
+      return;
+    } catch (err) {
+      lastErr = err as Error;
+      await new Promise((r) => setTimeout(r, 200));
+    }
+  }
+  throw new Error(
+    `${host}:${port} did not accept TCP connections (last: ${lastErr?.message ?? 'unknown'})`
+  );
+}
diff --git a/src/scenarios/server/_shared/wire-format.ts b/src/scenarios/server/_shared/wire-format.ts
@@ -0,0 +1,33 @@
+/**
+ * Wire-format validation helpers shared across server-conformance
+ * scenarios. Pure predicates / regex — no I/O, no async.
+ *
+ * Pragmatic choices documented per helper. When validation needs
+ * tighten (e.g., the spec mandates a stricter timestamp format), edit
+ * here once and every scenario picks it up.
+ */
+
+/**
+ * ISO-8601 timestamp prefix (YYYY-MM-DDThh:mm:ss). Tolerant about
+ * the timezone tail (`Z`, `+00:00`, `+0000`) and sub-second precision —
+ * matches what real servers emit (Go `time.RFC3339Nano`,
+ * Python `datetime.isoformat()`, JavaScript `toISOString()`).
+ *
+ * Why a regex over `Date.parse` / `new Date(s).toISOString() === s` /
+ * `Temporal.Instant.from`:
+ *   - `Date.parse` accepts RFC-2822, "May 4 2026", and other
+ *     non-ISO strings — too permissive.
+ *   - `new Date(s).toISOString() === s` is too strict — rejects
+ *     valid `+00:00`-style offsets that don't survive the canonical
+ *     `Z` round-trip.
+ *   - `Temporal.Instant.from` is Node 24+ experimental.
+ *
+ * Swap this constant for a stdlib validator if/when one becomes
+ * broadly available.
+ */
+export const ISO_8601_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/;
+
+/** Returns true when the input is a string matching ISO-8601 prefix. */
+export function isIso8601(s: unknown): boolean {
+  return typeof s === 'string' && ISO_8601_PATTERN.test(s);
+}
diff --git a/src/scenarios/server/mrtr/README.md b/src/scenarios/server/mrtr/README.md
@@ -0,0 +1,116 @@
+# SEP-2322 MRTR — Server Conformance
+
+Tests any MCP server that implements the SEP-2322 ephemeral
+Multi Round-Trip Request flow on `tools/call` — the
+`InputRequiredResult` → retry-with-`inputResponses` → `ToolResult`
+contract that lets a tool gather elicitation / sampling / roots input
+without creating a task envelope. The variant was renamed from
+`IncompleteResult` / `"incomplete"` in SEP-2322 commit `de6d76fb`
+(merged 2026-05-06).
+
+## Specs covered
+
+| SEP      | What it adds                                                                                                     | Where it shows up             |
+| -------- | ---------------------------------------------------------------------------------------------------------------- | ----------------------------- |
+| SEP-2322 | Ephemeral MRTR — `resultType` discriminator, `inputRequests` / `inputResponses` keyed maps, `requestState` token | every check                   |
+| SEP-2663 | MRTR → Tasks composition (final round returns `CreateTaskResult`)                                                | mrtr-08 (SKIPPED — see below) |
+
+## ClientScenario classes
+
+### `mrtr-ephemeral-flow` (`ephemeral-flow.ts`)
+
+A single scenario covering the full ephemeral MRTR contract — per the
+AGENTS.md "fewer scenarios, more checks" rule. A server that
+implemented elicitation round-trips but not sampling round-trips would
+be incoherent, so they bundle.
+
+| Check                                    | What it tests                                                                                                                      |
+| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------- |
+| `mrtr-basic-elicitation-round-trip`      | Round 1 returns `InputRequiredResult` with `elicitation/create`; round 2 completes with the answer reflected                       |
+| `mrtr-sampling-round-trip`               | Same flow with `sampling/createMessage`                                                                                            |
+| `mrtr-roots-list-round-trip`             | Same flow with `roots/list`                                                                                                        |
+| `mrtr-request-state-round-trip`          | When server emits `requestState`, it's a non-empty string and the server validates the echo                                        |
+| `mrtr-multiple-input-requests-one-round` | A single `InputRequiredResult` MAY carry inputRequests for `elicitation/create` + `sampling/createMessage` + `roots/list` together |
+| `mrtr-multi-round-flow`                  | A handler MAY take 2+ rounds; each round mints a fresh `requestState`; final result reflects answers from every round              |
+| `mrtr-wrong-input-key-rerequests`        | When client sends a wrong `inputResponses` key, server SHOULD re-request via `InputRequiredResult` rather than erroring            |
+| `mrtr-tasks-composition`                 | **SKIPPED** — see "Open issues" below                                                                                              |
+
+## Required server fixtures
+
+The fixture server MUST register these tools:
+
+| Tool                                     | Behavior                                                                                    |
+| ---------------------------------------- | ------------------------------------------------------------------------------------------- |
+| `test_tool_with_elicitation`             | One `elicitation/create` round, completes with answer reflected                             |
+| `test_incomplete_result_sampling`        | One `sampling/createMessage` round                                                          |
+| `test_incomplete_result_list_roots`      | One `roots/list` round                                                                      |
+| `test_incomplete_result_request_state`   | Exercises `requestState` validation; final result includes `state-ok` to confirm validation |
+| `test_incomplete_result_multiple_inputs` | Emits 3+ inputRequests of different methods in one round                                    |
+| `test_incomplete_result_multi_round`     | Drives 2+ MRTR rounds, final result references every answer                                 |
+| `test_incomplete_result_elicitation`     | Emits inputRequest for `user_name`; server re-requests on wrong-key responses               |
+
+The fixture can be implemented in any language; one example reference
+implementation lives at
+[`panyam/mcpkit/examples/mrtr`](https://github.com/panyam/mcpkit/tree/main/examples/mrtr).
+
+## Running
+
+```bash
+# Against an already-running server
+MRTR_SERVER_URL=http://localhost:8080/mcp \
+  npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
+
+# Auto-spawn a fixture in beforeAll
+MRTR_SERVER_URL=http://localhost:18093/mcp \
+MRTR_SERVER_CMD="/path/to/mrtr-server --port 18093" \
+  npx vitest run src/scenarios/server/mrtr/all-scenarios.test.ts
+```
+
+## Open issues
+
+### `mrtr-tasks-composition` deferred
+
+SEP-2663 commit `451f5e1` (Apr 30) made the MRTR → Tasks composition
+flow normative: a `tools/call` MAY exchange `InputRequiredResult` rounds
+to gather input, then return `CreateTaskResult` to go async on a
+subsequent round. Two blockers prevent enabling the check today:
+
+1. **Spec watch — discriminator value.** SEP-2322 merged on 2026-05-06
+   with `"input_required"` (commit `de6d76fb` renamed the variant from
+   IncompleteResult / `"incomplete"` per dsp-ant request). SEP-2663's
+   PR head (82fb2c4d as of 2026-05-07 PM) still reads `"incomplete"`
+   on line 121 of the mdx — Caitie's 5/15 RC commitment (issue
+   comment 4384052694) tracks the alignment. The constant lives in
+   `MRTR_INPUT_REQUIRED_RESULT_TYPE` (helpers.ts) so it's a one-line
+   flip if SEP-2663's eventual alignment surprises us.
+
+2. **Reference-impl gap.** The natural server-side implementation
+   pattern for tasks (mint task up-front, run handler in a goroutine /
+   async task) means the handler's `InputRequiredResult` signal isn't
+   visible to the middleware in time — by the time the handler returns
+   `IsInputRequired`, the `CreateTaskResult` is already on the wire. SDKs
+   in any language need an inverted middleware pattern that runs the
+   first round synchronously and only spins up the task once the
+   handler signals async-promotion.
+   ([panyam/mcpkit issue 347](https://github.com/panyam/mcpkit/issues/347)
+   tracks this for one example impl; SDKs in any language hit the
+   same architectural choice.)
+
+The check is registered with `status: 'SKIPPED'` so it's discoverable
+but doesn't fail conformance runs. When both blockers resolve, remove
+the SKIPPED short-circuit in `ephemeral-flow.ts` Check 8.
+
+## Design notes
+
+### Why the MRTR scenarios share helpers with `tasks/`
+
+`MRTR_INPUT_REQUIRED_RESULT_TYPE`, the result-type predicates
+(`isInputRequiredResult`, `isCompleteResult`), and the elicitation/sampling/
+roots mocks live in `mrtr/helpers.ts`. The shared `AnyResult` Zod
+passthrough schema and `waitForTerminal`/`waitForStatus` polling helpers
+are imported from the sibling `../tasks/helpers` because both scenario
+sets share the same wire-shape problem (SDK Zod schemas strip extension
+fields). Pair `client.request(req, AnyResult)` with the SDK's
+`StreamableHTTPClientTransport` and you preserve every SEP-2322 / SEP-2663
+field. When the upstream SDK gains schemas for those shapes, the
+passthrough disappears in favor of the typed schemas directly.