diff --git a/CHANGELOG.md b/CHANGELOG.md index 8541a34b129..346603d950d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +# Unreleased + +### Notable enhancements + +- **Self-update subsystem — Tier 2 (manual click).** + - Admins on a git install can click "Apply update" at `/admin/update`. Etherpad runs a 60s session drain (with T-60 / T-30 / T-10 broadcasts to every pad), `git fetch / checkout / pnpm install --frozen-lockfile / pnpm run build:ui`, and exits with code 75 so a process supervisor restarts it on the new version. The next boot runs a 60s health check; if `/health` doesn't come up the previous SHA + lockfile are restored automatically. + - Crash-loop guard: if the new version reboots more than twice without the health check completing, RollbackHandler forces a rollback regardless of the timer. + - Terminal `rollback-failed` state surfaces a strong banner; the admin clicks Acknowledge once they've manually recovered to clear the lock and re-allow Tier 2 attempts. + - New settings under `updates.*`: `preApplyGraceMinutes`, `drainSeconds`, `rollbackHealthCheckSeconds`, `diskSpaceMinMB`, `requireSignature`, `trustedKeysPath`. Tag signature verification is opt-in (default `false`) — see `doc/admin/updates.md` for the keyring setup. + - **A process supervisor (systemd / pm2 / docker `--restart=unless-stopped`) is required to apply updates.** Without one, exit 75 leaves the instance down. + - Tiers 3 (auto with grace window) and 4 (autonomous in maintenance window) remain designed but unimplemented and will land in subsequent releases. + # 2.7.3 ### Breaking changes diff --git a/admin/src/components/UpdateBanner.tsx b/admin/src/components/UpdateBanner.tsx index 36f1faddc29..e69e89c2625 100644 --- a/admin/src/components/UpdateBanner.tsx +++ b/admin/src/components/UpdateBanner.tsx @@ -17,7 +17,21 @@ export const UpdateBanner = () => { return () => { cancelled = true; }; }, [setUpdateStatus]); - if (!updateStatus || !updateStatus.latest) return null; + if (!updateStatus) return null; + + // Terminal rollback-failed wins over the regular "update available" banner — + // an admin who left the system in this state needs to fix it before any + // other admin work matters. + if (updateStatus.execution?.status === 'rollback-failed') { + return ( +
+ {' '} + {t('update.banner.cta')} +
+ ); + } + + if (!updateStatus.latest) return null; if (updateStatus.currentVersion === updateStatus.latest.version) return null; return ( diff --git a/admin/src/pages/UpdatePage.tsx b/admin/src/pages/UpdatePage.tsx index 0d669a446f3..8e9c3354884 100644 --- a/admin/src/pages/UpdatePage.tsx +++ b/admin/src/pages/UpdatePage.tsx @@ -9,37 +9,75 @@ type FetchState = | {kind: 'error', status: number} | {kind: 'ok'}; +const IN_FLIGHT_STATUSES = ['preflight', 'draining', 'executing', 'rolling-back']; + export const UpdatePage = () => { const {t} = useTranslation(); const us = useStore((s) => s.updateStatus); const setUpdateStatus = useStore((s) => s.setUpdateStatus); + const log = useStore((s) => s.updateLog); + const setLog = useStore((s) => s.setUpdateLog); // Self-fetch so the page renders an explicit state even if UpdateBanner's // best-effort fetch never landed (route returns 404 when tier=off, 401/403 // if requireAdminForStatus is set, or a transient network error). const [fetchState, setFetchState] = useState(us ? {kind: 'ok'} : {kind: 'loading'}); + const [actionInFlight, setActionInFlight] = useState(false); + + const refreshStatus = async () => { + try { + const r = await fetch('/admin/update/status', {credentials: 'same-origin'}); + if (r.ok) { + const data = await r.json(); + setUpdateStatus(data); + setFetchState({kind: 'ok'}); + } else if (r.status === 404) { + setFetchState({kind: 'disabled'}); + } else if (r.status === 401 || r.status === 403) { + setFetchState({kind: 'unauthorized'}); + } else { + setFetchState({kind: 'error', status: r.status}); + } + } catch { + setFetchState({kind: 'error', status: 0}); + } + }; useEffect(() => { let cancelled = false; - fetch('/admin/update/status', {credentials: 'same-origin'}) - .then(async (r) => { - if (cancelled) return; - if (r.ok) { - const data = await r.json(); - setUpdateStatus(data); - setFetchState({kind: 'ok'}); - } else if (r.status === 404) { - setFetchState({kind: 'disabled'}); - } else if (r.status === 401 || r.status === 403) { - setFetchState({kind: 'unauthorized'}); - } else { - setFetchState({kind: 'error', status: r.status}); - } - }) - .catch(() => { - if (!cancelled) setFetchState({kind: 'error', status: 0}); - }); + void refreshStatus().then(() => { if (cancelled) return; }); return () => { cancelled = true; }; - }, [setUpdateStatus]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + // Poll log + status while the executor is in flight, then stop. + const status = us?.execution?.status ?? 'idle'; + const inFlight = IN_FLIGHT_STATUSES.includes(status); + useEffect(() => { + if (!inFlight) return; + let cancelled = false; + const tick = async () => { + if (cancelled) return; + try { + const lr = await fetch('/admin/update/log', {credentials: 'same-origin'}); + if (lr.ok) setLog(await lr.text()); + } catch {/* noop */} + await refreshStatus(); + if (!cancelled) setTimeout(tick, 1000); + }; + void tick(); + return () => { cancelled = true; }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [inFlight]); + + const post = async (path: string) => { + setActionInFlight(true); + try { + await fetch(path, {method: 'POST', credentials: 'same-origin'}); + await refreshStatus(); + } finally { + setActionInFlight(false); + } + }; if (fetchState.kind === 'loading') { return
{t('admin.loading', {defaultValue: 'Loading...'})}
; @@ -61,16 +99,22 @@ export const UpdatePage = () => { ); } if (fetchState.kind === 'error' || !us) { - const status = fetchState.kind === 'error' ? fetchState.status : 0; + const stat = fetchState.kind === 'error' ? fetchState.status : 0; return (

-

{t('update.page.error', {defaultValue: 'Could not load update status (status {{status}}).', status})}

+

{t('update.page.error', {defaultValue: 'Could not load update status (status {{status}}).', status: stat})}

); } const upToDate = !us.latest || us.currentVersion === us.latest.version; + const showApply = !!us.policy?.canManual + && (status === 'idle' || status === 'verified') + && !us.lockHeld + && !upToDate; + const showCancel = status === 'preflight' || status === 'draining'; + const showAcknowledge = status === 'preflight-failed' || status === 'rolled-back' || status === 'rollback-failed'; return (
@@ -86,7 +130,53 @@ export const UpdatePage = () => {
{us.installMethod}
{us.tier}
+
+
{t(`update.execution.${status}`, {defaultValue: status})}
+ + {us.lastResult && ( +

+ +

+ )} + + {us.policy && !us.policy.canManual && !upToDate && ( +

+ +

+ )} + +
+ {showApply && ( + + )} + {showCancel && ( + + )} + {showAcknowledge && ( + + )} +
+ + {inFlight && ( +
+

+
{log}
+
+ )} + {upToDate ? (

) : us.latest ? ( diff --git a/admin/src/store/store.ts b/admin/src/store/store.ts index f3748f47cd4..71c85b5036f 100644 --- a/admin/src/store/store.ts +++ b/admin/src/store/store.ts @@ -3,6 +3,26 @@ import {Socket} from "socket.io-client"; import {PadSearchResult} from "../utils/PadSearch.ts"; import {InstalledPlugin} from "../pages/Plugin.ts"; +export type Execution = + | {status: 'idle'} + | {status: 'preflight'; targetTag: string; startedAt: string} + | {status: 'preflight-failed'; targetTag: string; reason: string; at: string} + | {status: 'draining'; targetTag: string; drainEndsAt: string; startedAt: string} + | {status: 'executing'; targetTag: string; fromSha: string; startedAt: string} + | {status: 'pending-verification'; targetTag: string; fromSha: string; deadlineAt: string} + | {status: 'verified'; targetTag: string; verifiedAt: string} + | {status: 'rolling-back'; reason: string; targetTag: string; fromSha: string; at: string} + | {status: 'rolled-back'; reason: string; targetTag: string; restoredSha: string; at: string} + | {status: 'rollback-failed'; reason: string; targetTag: string; fromSha: string; at: string}; + +export type LastResult = null | { + targetTag: string; + fromSha: string; + outcome: 'verified' | 'rolled-back' | 'rollback-failed' | 'preflight-failed' | 'cancelled'; + reason: string | null; + at: string; +}; + export interface UpdateStatusPayload { currentVersion: string; latest: null | { @@ -18,6 +38,10 @@ export interface UpdateStatusPayload { tier: string; policy: null | {canNotify: boolean; canManual: boolean; canAuto: boolean; canAutonomous: boolean; reason: string}; vulnerableBelow: Array<{announcedBy: string; threshold: string}>; + // Tier 2 additions: + execution: Execution; + lastResult: LastResult; + lockHeld: boolean; } type ToastState = { @@ -45,6 +69,8 @@ type StoreState = { setInstalledPlugins: (plugins: InstalledPlugin[])=>void, updateStatus: UpdateStatusPayload | null, setUpdateStatus: (s: UpdateStatusPayload) => void, + updateLog: string, + setUpdateLog: (log: string) => void, } @@ -70,4 +96,6 @@ export const useStore = create()((set) => ({ setInstalledPlugins: (plugins)=>set({installedPlugins: plugins}), updateStatus: null, setUpdateStatus: (s) => set({updateStatus: s}), + updateLog: '', + setUpdateLog: (log) => set({updateLog: log}), })); diff --git a/doc/admin/updates.md b/doc/admin/updates.md index 852912de3d9..ddafa889d26 100644 --- a/doc/admin/updates.md +++ b/doc/admin/updates.md @@ -1,8 +1,11 @@ # Etherpad updates -Etherpad ships with a built-in update subsystem. **Tier 1 (notify)** is enabled by default: a banner appears in the admin UI when a new release is available, and pad users see a discreet badge if the running version is severely outdated or flagged as vulnerable. No automatic execution happens at this tier — admins are simply informed. +Etherpad ships with a built-in update subsystem. -Tiers 2 (manual click), 3 (auto with grace window), and 4 (autonomous in maintenance window) are designed but not yet implemented. They will land in subsequent releases. +- **Tier 1 (notify)** — default. A banner appears in the admin UI when a new release is available, and pad users see a discreet badge if the running version is severely outdated or flagged as vulnerable. No execution. +- **Tier 2 (manual click)** — admins on a git install can click "Apply update" at `/admin/update`. Etherpad drains active sessions, runs `git fetch / checkout / pnpm install / pnpm run build:ui`, and exits with code 75 so a process supervisor restarts it on the new version. Auto-rolls back on failure. +- **Tier 3 (auto with grace window)** — designed, not yet implemented. +- **Tier 4 (autonomous in maintenance window)** — designed, not yet implemented. ## Settings @@ -17,7 +20,14 @@ In `settings.json`: "installMethod": "auto", "checkIntervalHours": 6, "githubRepo": "ether/etherpad", - "requireAdminForStatus": false + "requireAdminForStatus": false, + // Tier 2+ knobs (only meaningful at tier "manual" or higher): + "preApplyGraceMinutes": 0, + "drainSeconds": 60, + "rollbackHealthCheckSeconds": 60, + "diskSpaceMinMB": 500, + "requireSignature": false, + "trustedKeysPath": null }, "adminEmail": null } @@ -32,6 +42,12 @@ In `settings.json`: | `updates.checkIntervalHours` | `6` | How often to poll GitHub Releases. | | `updates.githubRepo` | `"ether/etherpad"` | Override for forks. | | `updates.requireAdminForStatus` | `false` | Lock the `/admin/update/status` endpoint to authenticated admin sessions. Default `false` matches existing Etherpad behavior — `/health` already exposes `releaseId` publicly, and changelog data comes from a public GitHub release. Set `true` to hide the full update payload from non-admins without disabling the updater (`tier: "off"` is the heavier opt-out that removes the endpoints entirely). | +| `updates.preApplyGraceMinutes` | `0` | **Tier 3 only.** Wait this many minutes between detecting a new release and starting the drain so the admin can cancel. Has no effect at tier `"manual"`. | +| `updates.drainSeconds` | `60` | How long to broadcast "restart imminent" announcements to active pads before exiting. T-60 / T-30 / T-10 broadcasts fire automatically at the matching offsets within this window. | +| `updates.rollbackHealthCheckSeconds` | `60` | After a fresh boot post-update, give `/health` this long to come up. If it doesn't, RollbackHandler restores the previous SHA. | +| `updates.diskSpaceMinMB` | `500` | Pre-flight refuses to start an update unless the install volume has at least this many MB free. | +| `updates.requireSignature` | `false` | When `true`, refuse updates whose tag is not signed by a trusted key. Verification is done via `git verify-tag ` against the user's GPG keyring. Default `false` because Etherpad's release process does not yet sign tags consistently — turning the check on by default would block every Tier 2 update. Set `true` if you run your own builds or have imported a fork's keys. | +| `updates.trustedKeysPath` | `null` | Override the keyring location passed to `git verify-tag` via the `$GNUPGHOME` env var. Useful when the trusted keys live in a dedicated keyring outside the Etherpad user's home. Only meaningful when `requireSignature: true`. | | `adminEmail` | `null` | Top-level. Contact for admin notifications. Setting it enables the email nudges below. | ## What "outdated" means @@ -81,3 +97,68 @@ The version check sends no telemetry. Etherpad fetches the public GitHub Release Set the value explicitly if the heuristics get it wrong (e.g., a docker container that bind-mounts a writable git checkout). In PR 1 (notify only) the install method does not change behavior — every install method gets the banner. From PR 2 onward the install method gates whether the manual-click and automatic tiers can run; only `"git"` is initially supported for write tiers. + +## Tier 2 — manual click + +Tier 2 is opt-in. To enable: set `updates.tier: "manual"` and ensure your install was deployed via git (not docker / npm / managed package). + +### Process supervisor is required + +Etherpad applies an update by **exiting with code 75** so a process supervisor restarts it. Without a supervisor the instance simply exits and stays down. Common supervisor setups: + +- **systemd:** add `Restart=on-failure` + `RestartSec=5` to your unit file. +- **pm2:** the default behaviour restarts on exit. +- **docker:** add `--restart=unless-stopped` (Tier 2 itself is not supported on docker installs anyway, but if you wrap your own image around a git checkout this applies). + +### What clicking "Apply update" does + +1. **Lock acquire** — `var/update.lock` (PID-based, stale locks reaped automatically). +2. **Pre-flight checks** — install method writable, working tree clean, free disk ≥ `diskSpaceMinMB`, `pnpm` on `PATH`, target tag exists at the configured remote, signature verifies (if `requireSignature: true`). On failure, state goes to `preflight-failed` with a typed reason; the admin sees a banner and clicks **Acknowledge** to clear it. No filesystem mutation has happened — nothing to roll back. +3. **Drain** — `drainSeconds` window during which T-60 / T-30 / T-10 announcements broadcast to every connected pad and new socket connections are refused. Click **Cancel** during this window to abort cleanly. +4. **Execute** — `git fetch --tags origin`, `git checkout `, `pnpm install --frozen-lockfile`, `pnpm run build:ui`. Output streams to `var/log/update.log` (rotated 10 MB × 5). +5. **Exit 75** — the supervisor restarts on the new version. +6. **Health check** — RollbackHandler arms a `rollbackHealthCheckSeconds` timer at boot. When `/health` responds 200 (i.e., Etherpad reaches the `RUNNING` state) the timer cancels and the state lands on `verified`. + +### Failure modes + +| What went wrong | Resulting state | Admin action | +| --- | --- | --- | +| Pre-flight check fails | `preflight-failed` | Click **Acknowledge** after fixing the underlying issue (free up disk, clean working tree, etc.). | +| `git fetch` / `git checkout` fails mid-flow | `rolled-back` | Informational. The working tree is back where it started; click **Acknowledge** to clear. | +| `pnpm install` or `pnpm run build:ui` fails | `rolled-back` | Same as above. The lockfile and SHA are restored. | +| `/health` doesn't come up within `rollbackHealthCheckSeconds` | `rolled-back` | Same — RollbackHandler restores the previous SHA + lockfile and exits 75 again. | +| The new version crashes at boot more than twice (`bootCount > 2`) | `rolled-back` | Crash-loop guard kicks in regardless of the health-check timer. | +| Rollback itself fails (e.g., `pnpm install` errors restoring old lockfile) | `rollback-failed` | **Manual intervention required.** The admin banner switches to a strong red alert. Restore the install by hand, then click **Acknowledge** to clear the lock and re-allow Tier 2 attempts. | + +### Endpoints + +All Tier 2 endpoints require an authenticated admin session (`is_admin: true`) regardless of `requireAdminForStatus`. + +- `POST /admin/update/apply` — start an apply. Returns `202 {accepted, drainEndsAt}` once the drain begins. Body unused. +- `POST /admin/update/cancel` — cancel during pre-flight or drain. Returns `409` once the executor has begun mutating the filesystem (state machine guarantees we either complete or roll back from there). +- `POST /admin/update/acknowledge` — clear a terminal `preflight-failed` / `rolled-back` / `rollback-failed` state back to `idle`. +- `GET /admin/update/log` — tail the last 200 lines of `var/log/update.log`. Plain text. Used by the in-progress UI. + +### Signature verification + +Default off. Etherpad releases are not yet consistently signed; turning verification on by default would block every Tier 2 update. To enable: + +```jsonc +"updates": { + "requireSignature": true, + "trustedKeysPath": "/srv/etherpad/keys" // optional — defaults to the OS user keyring +} +``` + +The check shells out to `git verify-tag `. The keyring at `trustedKeysPath` is passed to git via `GNUPGHOME`. If `trustedKeysPath` is `null` (default), the OS user's default keyring is used. + +### Docker-friendly update flows (future work) + +Tier 2 deliberately refuses to apply on `installMethod: "docker"` because in-container `git fetch / pnpm install / build:ui` doesn't survive a container restart — the orchestrator brings the container back up on the same image tag and the work is lost. Docker installs stay on Tier 1 (banner + version status) for now. + +The right way to give docker admins an in-product Apply button is to delegate to the orchestrator rather than mutate the container. Two patterns to consider in a follow-up PR: + +- **Instructions-only.** When the page detects `installMethod: docker` *and* a newer release exists, swap the policy-denial copy for actionable instructions (`docker pull etherpad/etherpad:` for plain docker; `docker compose pull && docker compose up -d` for compose). Cheap, no new attack surface. +- **Deploy webhook.** New setting `updates.dockerWebhook`. When set, the Apply button on a docker install POSTs to the configured URL and trusts the orchestrator (Render / Railway / Fly / Portainer / Coolify / GitHub Actions — they all expose redeploy webhooks) to do the actual pull-and-recreate. + +Direct Docker-socket access (mount `/var/run/docker.sock` into the container) is **out of scope** — anyone who escapes the Etherpad process via that socket gets root on the host. Admins who want fully autonomous docker updates should run [Watchtower](https://containrrr.dev/watchtower/) alongside Etherpad rather than bake equivalent privilege into Etherpad itself. diff --git a/docs/superpowers/plans/2026-05-08-auto-update-pr2-manual-click.md b/docs/superpowers/plans/2026-05-08-auto-update-pr2-manual-click.md new file mode 100644 index 00000000000..2840f0ff214 --- /dev/null +++ b/docs/superpowers/plans/2026-05-08-auto-update-pr2-manual-click.md @@ -0,0 +1,3222 @@ +# Auto-Update PR 2 — Tier 2 (Manual Click) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship Tier 2 of the four-tier auto-update subsystem: an admin can click "Apply now" on the existing `/admin/update` page, Etherpad drains active sessions for 60s, runs `git fetch / checkout / pnpm install --frozen-lockfile / pnpm run build:ui`, exits 75 for a process supervisor to restart, and on the next boot a health-check timer either marks the update verified or rolls back. + +**Architecture:** Build atomic primitives (lock, executor, rollback, drainer) under `src/node/updater/`, expose four admin-only state-changing endpoints (`apply`, `cancel`, `acknowledge`, `log`) plus log-tail streaming, wire RollbackHandler into the boot sequence, and extend the existing `/admin/update` page with an Apply button + log view + terminal-state acknowledgement UI. Every executable step goes through dependency-injected `spawn`/`fetch`/`fs` so we can run the full pipeline in tests against a tmp git repo without mutating the real install. + +**Tech Stack:** TypeScript (Node 20+), `child_process.spawn`, `node:fs/promises`, log4js (rolling-file appender), express + supertest (mocha integration), vitest (unit), React + zustand + react-i18next (admin UI), Playwright (admin E2E). + +**Spec:** `docs/superpowers/specs/2026-04-25-auto-update-design.md` (sections "Architecture / Components", "API surface / Tier 2 — manual click", "Error handling", "Phased rollout / PR 2"). + +**Out of scope (deferred):** Tier 3 Scheduler + grace window, Tier 4 MaintenanceWindow, real GPG signature verification (we ship a feature-flagged stub gated by `updates.requireSignature: false`; documented as follow-up). + +--- + +## File Structure + +### New files +- `src/node/updater/lock.ts` — PID-based file lock (`var/update.lock`), stale-pid reaper. +- `src/node/updater/trustedKeys.ts` — release-tag signature verification (stubbed unless `requireSignature: true`). +- `src/node/updater/preflight.ts` — pure-ish pre-flight checks (working tree clean, disk space, lock free, install method writable, target tag exists, sig verifies). +- `src/node/updater/UpdateExecutor.ts` — child-process orchestration (snapshot → fetch → checkout → install → build → exit 75). All shell-outs go through an injected `spawnFn`. +- `src/node/updater/RollbackHandler.ts` — boot-time pending-verification check, 60s health timer, crash-loop guard, restore SHA + lockfile + retry install on failure. +- `src/node/updater/SessionDrainer.ts` — broadcasts shoutMessage at T-60/-30/-10, refuses new socket connections via a module flag. +- `src/node/updater/updateLog.ts` — log4js rolling-file appender pointed at `var/log/update.log` (10MB × 5) + `tailLines(n)` helper. +- `src/node/hooks/express/updateActions.ts` — registers `POST /admin/update/{apply,cancel,acknowledge}` and `GET /admin/update/log`. Strict admin auth on all four. +- `src/tests/backend-new/specs/updater/lock.test.ts` +- `src/tests/backend-new/specs/updater/preflight.test.ts` +- `src/tests/backend-new/specs/updater/UpdateExecutor.test.ts` +- `src/tests/backend-new/specs/updater/RollbackHandler.test.ts` +- `src/tests/backend-new/specs/updater/SessionDrainer.test.ts` +- `src/tests/backend-new/specs/updater/updateLog.test.ts` +- `src/tests/backend/specs/updateActions.ts` — mocha integration tests for apply/cancel/acknowledge/log. +- `src/tests/backend/specs/updater-integration.ts` — end-to-end against a tmp git repo (happy path, install-fail rollback, build-fail rollback, health-check timeout, crash-loop forced rollback, terminal `rollback-failed` blocks auto/autonomous but allows manual). +- `src/tests/frontend-new/admin-spec/update-page-actions.spec.ts` — Playwright: Apply button, log stream visibility, terminal-state Acknowledge, refusal when policy denies. +- `doc/admin/updates.md` — extend with Tier 2 docs (Apply flow, settings, supervisor requirement). + +### Modified files +- `src/node/updater/types.ts` — extend `UpdateState` with `execution: ExecutionState`, `bootCount: number`, `lastResult`. Add discriminated `ExecutionStatus` union covering all states from the spec's state machine. +- `src/node/updater/state.ts` — extend the `isValid` validator to cover the new fields; backfill defaults during load so state files written by PR 1 still load. +- `src/node/updater/UpdatePolicy.ts` — extend `evaluatePolicy` so `canManual` returns false in `rollback-failed`-equivalent terminal states only when `purpose === 'auto'`; manual remains permitted (admin clicking Apply *is* the intervention). Add `purpose: 'manual' | 'auto'` to the input. +- `src/node/updater/index.ts` — call RollbackHandler.checkPendingVerification at boot before VersionChecker starts; expose getters needed by routes. +- `src/node/utils/Settings.ts` — add `updates.preApplyGraceMinutes` (default 0 in PR 2; tier 3 makes it meaningful), `updates.drainSeconds` (default 60), `updates.rollbackHealthCheckSeconds` (default 60), `updates.diskSpaceMinMB` (default 500), `updates.requireSignature` (default false), `updates.trustedKeysPath` (default null). +- `settings.json.template`, `settings.json.docker` — add the new `updates.*` keys with shipped defaults and a comment block. +- `src/static/js/pad_utils.js` (or the COLLABROOM message handler) — recognise a new `shoutMessage` subtype `update-drain` so the drain notice has its own translatable string and CSS hook (the spec calls this a "system message at T-60/T-30/T-10"; we route it through the existing shout pipeline). +- `src/locales/en.json` — add `update.page.apply`, `update.page.cancel`, `update.page.acknowledge`, `update.page.log`, `update.page.execution`, `update.page.policy.*`, `update.page.last_result.*`, `update.execution.*`, `update.banner.terminal.rollback-failed`, `update.drain.t60`, `update.drain.t30`, `update.drain.t10`. +- `admin/src/store/store.ts` — extend `UpdateStatusPayload` with `execution`, `bootCount`, `lastResult` to match server shape; add `setUpdateLog` slice. +- `admin/src/pages/UpdatePage.tsx` — Apply / Cancel / Acknowledge buttons (gated on `policy.canManual`), polling log view while `execution.status === 'executing' | 'draining'`, terminal-state copy + Acknowledge button. +- `admin/src/components/UpdateBanner.tsx` — surface terminal states (`rollback-failed`, `preflight-failed`, `rolled-back-*`) with stronger copy. +- `CHANGELOG.md` — Unreleased section entry. + +--- + +## Conventions + +- **Test runners:** unit specs go under `src/tests/backend-new/specs/updater/*.test.ts` and run with vitest (`pnpm vitest run path/to/file`). Integration/API specs go under `src/tests/backend/specs/*.ts` and run with mocha via `pnpm run test --runInBand` or `pnpm run test -- --grep `. +- **TDD loop:** write the failing test, run it, see the expected failure mode, write the minimum code to pass, run again, commit. +- **Commits:** one per task. Conventional Commits style. The footer used elsewhere on this branch is `Co-Authored-By: Claude Opus 4.7 (1M context) `. +- **No new "etherpad-lite" references** — the project is now "etherpad" in user-facing strings, docs, and configs (memory: `feedback_no_etherpad_lite_name`). +- **Always i18n** — never hardcode user-facing English (memory: `feedback_always_i18n`). Use existing keys when possible. +- **Working tree:** before starting, switch to a fresh branch off `develop`. Never push to `develop` or `main` directly (memory: `feedback_no_direct_push`). + +--- + +## Task 0: Branch off develop + +**Files:** none (git only). + +- [ ] **Step 1: Stash anything dirty, switch to develop, pull, branch off** + +```bash +git stash push -u -m "wip-7696-popup-scroll" || true +git fetch origin +git checkout develop +git pull --ff-only origin develop +git checkout -b feat/7607-auto-update-tier2-manual-click +``` + +Expected: branch `feat/7607-auto-update-tier2-manual-click` based on latest `origin/develop`. + +- [ ] **Step 2: Confirm Tier 1 surface still passes** + +Run: `pnpm run ts-check && pnpm vitest run src/tests/backend-new/specs/updater` +Expected: PASS (we are baselining before adding code). + +--- + +## Task 1: Extend types + state validator + settings for Tier 2 + +**Files:** +- Modify: `src/node/updater/types.ts` +- Modify: `src/node/updater/state.ts` +- Modify: `src/node/utils/Settings.ts` +- Modify: `settings.json.template` +- Modify: `settings.json.docker` +- Test: `src/tests/backend-new/specs/updater/state.test.ts` (existing — extend) + +- [ ] **Step 1: Add a failing test for the extended state shape** + +Append to `src/tests/backend-new/specs/updater/state.test.ts` inside its existing `describe`: + +```typescript +import {EMPTY_STATE} from '../../../../node/updater/types'; + +describe('Tier 2 state extensions', () => { + it('EMPTY_STATE carries an idle execution block, bootCount 0, no lastResult', () => { + expect(EMPTY_STATE.execution).toEqual({status: 'idle'}); + expect(EMPTY_STATE.bootCount).toBe(0); + expect(EMPTY_STATE.lastResult).toBeNull(); + }); + + it('loadState backfills missing Tier 2 fields on a Tier 1 file', async () => { + const tmp = path.join(os.tmpdir(), `state-${Date.now()}.json`); + await fs.writeFile(tmp, JSON.stringify({ + schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null, + vulnerableBelow: [], email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null}, + })); + const state = await loadState(tmp); + expect(state.execution).toEqual({status: 'idle'}); + expect(state.bootCount).toBe(0); + expect(state.lastResult).toBeNull(); + await fs.unlink(tmp); + }); + + it('rejects a malformed execution block by resetting to EMPTY_STATE', async () => { + const tmp = path.join(os.tmpdir(), `state-${Date.now()}.json`); + await fs.writeFile(tmp, JSON.stringify({ + schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null, + vulnerableBelow: [], email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null}, + execution: 'not-an-object', + })); + const state = await loadState(tmp); + expect(state).toEqual(EMPTY_STATE); + await fs.unlink(tmp); + }); +}); +``` + +(Add `import os from 'node:os'` and `import fs from 'node:fs/promises'` at the top of the file if not present.) + +- [ ] **Step 2: Run the test to confirm it fails** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/state.test.ts` +Expected: FAIL on `EMPTY_STATE.execution` being undefined. + +- [ ] **Step 3: Extend `types.ts`** + +Replace the bottom of `src/node/updater/types.ts` (`UpdateState` interface and `EMPTY_STATE`) with: + +```typescript +/** + * Discriminated union mirroring the state machine in + * docs/superpowers/specs/2026-04-25-auto-update-design.md (section "State machine"). + * + * Terminal states (`rollback-failed`) require an admin POST to /admin/update/acknowledge + * before further auto/autonomous attempts are allowed. Manual updates remain permitted + * because an admin clicking Apply *is* the intervention. + */ +export type ExecutionStatus = + | {status: 'idle'} + | {status: 'preflight'; targetTag: string; startedAt: string} + | {status: 'preflight-failed'; targetTag: string; reason: string; at: string} + | {status: 'draining'; targetTag: string; drainEndsAt: string; startedAt: string} + | {status: 'executing'; targetTag: string; fromSha: string; startedAt: string} + | {status: 'pending-verification'; targetTag: string; fromSha: string; deadlineAt: string} + | {status: 'verified'; targetTag: string; verifiedAt: string} + | {status: 'rolling-back'; reason: string; targetTag: string; fromSha: string; at: string} + | {status: 'rolled-back'; reason: string; targetTag: string; restoredSha: string; at: string} + | {status: 'rollback-failed'; reason: string; targetTag: string; fromSha: string; at: string}; + +export type LastUpdateResult = { + /** Tag we were updating to. */ + targetTag: string; + /** SHA we were updating from. */ + fromSha: string; + /** Outcome to surface in admin UI. */ + outcome: 'verified' | 'rolled-back' | 'rollback-failed' | 'preflight-failed' | 'cancelled'; + /** Human-readable reason on non-success. */ + reason: string | null; + /** ISO timestamp when this result was finalised. */ + at: string; +} | null; + +export interface UpdateState { + schemaVersion: 1; + lastCheckAt: string | null; + lastEtag: string | null; + latest: ReleaseInfo | null; + vulnerableBelow: VulnerableBelowDirective[]; + email: EmailSendLog; + /** Current in-flight execution state. Persisted so a restart mid-update reaches RollbackHandler. */ + execution: ExecutionStatus; + /** + * Boot counter that the RollbackHandler increments while a `pending-verification` + * status is live. > 2 means the new version crash-looped; force rollback regardless of timer. + */ + bootCount: number; + /** Most recent terminal outcome, surfaced in admin UI even after `execution` returns to idle. */ + lastResult: LastUpdateResult; +} + +export const EMPTY_STATE: UpdateState = { + schemaVersion: 1, + lastCheckAt: null, + lastEtag: null, + latest: null, + vulnerableBelow: [], + email: { + severeAt: null, + vulnerableAt: null, + vulnerableNewReleaseTag: null, + }, + execution: {status: 'idle'}, + bootCount: 0, + lastResult: null, +}; +``` + +- [ ] **Step 4: Extend `state.ts` validators** + +In `src/node/updater/state.ts`, add these helpers above `isValid` and call them from `isValid`: + +```typescript +const VALID_STATUSES = new Set([ + 'idle', 'preflight', 'preflight-failed', 'draining', 'executing', + 'pending-verification', 'verified', 'rolling-back', 'rolled-back', 'rollback-failed', +]); + +const isValidExecution = (v: unknown): boolean => { + if (!isPlainObject(v)) return false; + return typeof v.status === 'string' && VALID_STATUSES.has(v.status as string); +}; + +const isValidLastResult = (v: unknown): boolean => { + if (v === null) return true; + if (!isPlainObject(v)) return false; + return typeof v.targetTag === 'string' + && typeof v.fromSha === 'string' + && typeof v.outcome === 'string' + && (v.reason === null || typeof v.reason === 'string') + && typeof v.at === 'string'; +}; +``` + +Update `isValid` to *backfill* the new fields if missing instead of rejecting (to keep PR 1 state files loadable), and reject only when present-and-malformed: + +```typescript +const isValid = (raw: unknown): raw is UpdateState => { + if (!isPlainObject(raw)) return false; + if (raw.schemaVersion !== 1) return false; + if (!isStringOrNull(raw.lastCheckAt)) return false; + if (!isStringOrNull(raw.lastEtag)) return false; + if (!isValidLatest(raw.latest)) return false; + if (!isValidVulnerableBelow(raw.vulnerableBelow)) return false; + if (!isValidEmail(raw.email)) return false; + // PR 2 fields: missing → backfill at load time; present-but-wrong → reject. + if (raw.execution !== undefined && !isValidExecution(raw.execution)) return false; + if (raw.bootCount !== undefined && typeof raw.bootCount !== 'number') return false; + if (raw.lastResult !== undefined && !isValidLastResult(raw.lastResult)) return false; + return true; +}; +``` + +Update `loadState` to splat defaults for the new fields: + +```typescript +export const loadState = async (filePath: string): Promise => { + let raw: string; + try { + raw = await fs.readFile(filePath, 'utf8'); + } catch (err: any) { + if (err.code === 'ENOENT') return structuredClone(EMPTY_STATE); + throw err; + } + let parsed: unknown; + try { parsed = JSON.parse(raw); } catch { return structuredClone(EMPTY_STATE); } + if (!isValid(parsed)) return structuredClone(EMPTY_STATE); + // Backfill PR 2 fields on a Tier 1 state file. + return { + ...structuredClone(EMPTY_STATE), + ...(parsed as object), + execution: (parsed as any).execution ?? structuredClone(EMPTY_STATE.execution), + bootCount: (parsed as any).bootCount ?? 0, + lastResult: (parsed as any).lastResult ?? null, + }; +}; +``` + +- [ ] **Step 5: Extend `Settings.ts` typing and defaults** + +In the `SettingsType.updates` block (around line 326) add: + +```typescript + preApplyGraceMinutes: number, + drainSeconds: number, + rollbackHealthCheckSeconds: number, + diskSpaceMinMB: number, + requireSignature: boolean, + trustedKeysPath: string | null, +``` + +In the `settings: SettingsType = { ... updates: { ... } ... }` defaults (around line 506) add: + +```typescript + preApplyGraceMinutes: 0, + drainSeconds: 60, + rollbackHealthCheckSeconds: 60, + diskSpaceMinMB: 500, + requireSignature: false, + trustedKeysPath: null, +``` + +Add the same keys to `settings.json.template` and `settings.json.docker` inside their `updates` blocks. Comment in template: + +```jsonc + "updates": { + "tier": "notify", + /* ... existing keys ... */ + /* Tier 2+ knobs (only meaningful at tier "manual" or higher) */ + "preApplyGraceMinutes": 0, + "drainSeconds": 60, + "rollbackHealthCheckSeconds": 60, + "diskSpaceMinMB": 500, + /* When true, refuse updates whose tag is not signed by a trusted key. */ + "requireSignature": false, + "trustedKeysPath": null + }, +``` + +- [ ] **Step 6: Run the tests** + +```bash +pnpm vitest run src/tests/backend-new/specs/updater/state.test.ts +pnpm run ts-check +``` + +Expected: state tests PASS, ts-check clean. + +- [ ] **Step 7: Commit** + +```bash +git add src/node/updater/types.ts src/node/updater/state.ts \ + src/node/utils/Settings.ts settings.json.template settings.json.docker \ + src/tests/backend-new/specs/updater/state.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): extend state + settings for Tier 2 manual-click + +Adds ExecutionStatus discriminated union, bootCount, and lastResult to +UpdateState, plus the preApplyGraceMinutes/drainSeconds/diskSpaceMinMB/ +requireSignature/trustedKeysPath knobs that Tier 2's executor needs. +loadState backfills the new fields on Tier 1 state files so existing +installs keep working. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 2: PID-based update lock + +**Files:** +- Create: `src/node/updater/lock.ts` +- Test: `src/tests/backend-new/specs/updater/lock.test.ts` + +The lock at `var/update.lock` carries the holder's PID. A second acquire reads the file, sends signal 0 to the recorded PID; if the PID is gone (ESRCH) the lock is stale and we reap it. + +- [ ] **Step 1: Write failing test** + +Create `src/tests/backend-new/specs/updater/lock.test.ts`: + +```typescript +import {describe, it, expect, beforeEach, afterEach} from 'vitest'; +import fs from 'node:fs/promises'; +import path from 'node:path'; +import os from 'node:os'; +import {acquireLock, releaseLock, isHeld} from '../../../../node/updater/lock'; + +describe('update lock', () => { + let dir: string; + let lockPath: string; + beforeEach(async () => { + dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-lock-')); + lockPath = path.join(dir, 'update.lock'); + }); + afterEach(async () => { + await fs.rm(dir, {recursive: true, force: true}); + }); + + it('acquires and releases', async () => { + expect(await acquireLock(lockPath)).toBe(true); + expect(await isHeld(lockPath)).toBe(true); + await releaseLock(lockPath); + expect(await isHeld(lockPath)).toBe(false); + }); + + it('rejects a second acquire while live', async () => { + expect(await acquireLock(lockPath)).toBe(true); + expect(await acquireLock(lockPath)).toBe(false); + await releaseLock(lockPath); + }); + + it('reaps a stale lock whose PID is gone', async () => { + // Write a lock claiming a PID that almost certainly does not exist. + await fs.writeFile(lockPath, JSON.stringify({pid: 2147483646, at: new Date().toISOString()})); + expect(await acquireLock(lockPath)).toBe(true); + await releaseLock(lockPath); + }); + + it('treats an unparseable lock file as stale', async () => { + await fs.writeFile(lockPath, 'garbage'); + expect(await acquireLock(lockPath)).toBe(true); + await releaseLock(lockPath); + }); +}); +``` + +- [ ] **Step 2: Run — expect fail (module missing)** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/lock.test.ts` +Expected: FAIL with import error. + +- [ ] **Step 3: Implement lock** + +Create `src/node/updater/lock.ts`: + +```typescript +import fs from 'node:fs/promises'; +import path from 'node:path'; + +interface LockFile {pid: number; at: string} + +const isPidLive = (pid: number): boolean => { + try { + process.kill(pid, 0); + return true; + } catch (err: any) { + // ESRCH = no such process (stale). EPERM = exists but we can't signal — treat as live. + return err.code !== 'ESRCH'; + } +}; + +const readIfPresent = async (lockPath: string): Promise => { + let raw: string; + try { raw = await fs.readFile(lockPath, 'utf8'); } + catch (err: any) { return err.code === 'ENOENT' ? null : null; } + try { + const parsed = JSON.parse(raw); + if (typeof parsed?.pid !== 'number' || typeof parsed?.at !== 'string') return null; + return parsed; + } catch { return null; } +}; + +/** + * Atomic acquire via O_CREAT|O_EXCL. If the file already exists, the holder's PID + * is checked; if dead, we reap and retry once. Returns false on a live conflict. + */ +export const acquireLock = async (lockPath: string): Promise => { + await fs.mkdir(path.dirname(lockPath), {recursive: true}); + const payload = JSON.stringify({pid: process.pid, at: new Date().toISOString()}); + try { + const fh = await fs.open(lockPath, 'wx'); + try { await fh.writeFile(payload); } finally { await fh.close(); } + return true; + } catch (err: any) { + if (err.code !== 'EEXIST') throw err; + } + const existing = await readIfPresent(lockPath); + if (existing && isPidLive(existing.pid)) return false; + // Stale — unlink and retry once. A concurrent reaper may beat us, so EEXIST is also "no". + try { await fs.unlink(lockPath); } catch (err: any) { if (err.code !== 'ENOENT') throw err; } + try { + const fh = await fs.open(lockPath, 'wx'); + try { await fh.writeFile(payload); } finally { await fh.close(); } + return true; + } catch (err: any) { + if (err.code === 'EEXIST') return false; + throw err; + } +}; + +export const releaseLock = async (lockPath: string): Promise => { + try { await fs.unlink(lockPath); } + catch (err: any) { if (err.code !== 'ENOENT') throw err; } +}; + +export const isHeld = async (lockPath: string): Promise => { + const f = await readIfPresent(lockPath); + return !!f && isPidLive(f.pid); +}; +``` + +- [ ] **Step 4: Run — expect pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/lock.test.ts` +Expected: PASS (4 tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/lock.ts src/tests/backend-new/specs/updater/lock.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): PID-based update.lock with stale-pid reaping + +Single-flight guard for Tier 2's UpdateExecutor. Atomic O_CREAT|O_EXCL +acquire; on EEXIST, sends signal 0 to the recorded PID and reaps if dead. +Unparseable lock files are treated as stale rather than fatal so a +half-written lock from a SIGKILL'd parent doesn't lock the install out. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 3: Trusted-keys / signature verification stub + +**Files:** +- Create: `src/node/updater/trustedKeys.ts` +- Test: `src/tests/backend-new/specs/updater/trustedKeys.test.ts` + +We ship a feature-flagged signature verifier. With `updates.requireSignature: false` (default) we log a one-line warning and return `ok`. With `requireSignature: true` we shell out to `git verify-tag ` and require exit 0; the trusted set is whatever keys are imported into the Etherpad user's GnuPG keyring (or a custom keyring at `updates.trustedKeysPath` — passed to git via `GNUPGHOME`). Real key-rotation policy is documented as follow-up; this gives admins who care a working knob today. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/trustedKeys.test.ts`: + +```typescript +import {describe, it, expect, vi} from 'vitest'; +import {verifyReleaseTag} from '../../../../node/updater/trustedKeys'; + +describe('verifyReleaseTag', () => { + it('returns ok when requireSignature is false (no spawn)', async () => { + const spawnFn = vi.fn(); + const r = await verifyReleaseTag({ + tag: 'v2.7.3', repoDir: '/tmp/x', requireSignature: false, + trustedKeysPath: null, spawnFn: spawnFn as any, + }); + expect(r).toEqual({ok: true, reason: 'signature-not-required'}); + expect(spawnFn).not.toHaveBeenCalled(); + }); + + it('returns ok on git verify-tag exit 0', async () => { + const spawnFn = vi.fn(() => ({on: (e: string, cb: any) => e === 'close' && setTimeout(() => cb(0), 0)})); + const r = await verifyReleaseTag({ + tag: 'v2.7.3', repoDir: '/tmp/x', requireSignature: true, + trustedKeysPath: null, spawnFn: spawnFn as any, + }); + expect(r.ok).toBe(true); + expect(spawnFn).toHaveBeenCalledWith( + 'git', + ['verify-tag', 'v2.7.3'], + expect.objectContaining({cwd: '/tmp/x'}), + ); + }); + + it('returns failure on non-zero exit', async () => { + const spawnFn = vi.fn(() => ({on: (e: string, cb: any) => e === 'close' && setTimeout(() => cb(1), 0)})); + const r = await verifyReleaseTag({ + tag: 'v2.7.3', repoDir: '/tmp/x', requireSignature: true, + trustedKeysPath: null, spawnFn: spawnFn as any, + }); + expect(r).toEqual({ok: false, reason: 'signature-verification-failed'}); + }); + + it('passes GNUPGHOME when trustedKeysPath is set', async () => { + const calls: any[] = []; + const spawnFn = vi.fn((cmd: string, args: string[], opts: any) => { + calls.push({cmd, args, env: opts.env}); + return {on: (e: string, cb: any) => e === 'close' && setTimeout(() => cb(0), 0)} as any; + }); + await verifyReleaseTag({ + tag: 'v2.7.3', repoDir: '/tmp/x', requireSignature: true, + trustedKeysPath: '/srv/etherpad/keys', spawnFn: spawnFn as any, + }); + expect(calls[0].env.GNUPGHOME).toBe('/srv/etherpad/keys'); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/trustedKeys.test.ts` +Expected: FAIL (module missing). + +- [ ] **Step 3: Implement** + +Create `src/node/updater/trustedKeys.ts`: + +```typescript +import {spawn as realSpawn, SpawnOptions} from 'node:child_process'; +import log4js from 'log4js'; + +const logger = log4js.getLogger('updater'); + +export type SpawnFn = (cmd: string, args: string[], opts: SpawnOptions) => { + on: (event: 'close', cb: (code: number | null) => void) => void; +}; + +export interface VerifyArgs { + tag: string; + repoDir: string; + requireSignature: boolean; + trustedKeysPath: string | null; + spawnFn?: SpawnFn; +} + +export type VerifyResult = + | {ok: true; reason: 'signature-verified' | 'signature-not-required'} + | {ok: false; reason: 'signature-verification-failed'}; + +/** + * Verify a release tag's GPG signature. With requireSignature=false (default) + * this is a documented no-op — Etherpad's release process does not yet sign + * tags consistently and forcing verification on by default would break Tier 2 + * for everyone. Admins who manage their own builds set requireSignature=true + * and import their trusted keys into the Etherpad user's keyring (or a + * dedicated one via trustedKeysPath -> $GNUPGHOME). + */ +export const verifyReleaseTag = async (args: VerifyArgs): Promise => { + if (!args.requireSignature) { + logger.warn(`verifyReleaseTag: signature check skipped (updates.requireSignature=false) for ${args.tag}`); + return {ok: true, reason: 'signature-not-required'}; + } + const spawnFn = args.spawnFn ?? (realSpawn as unknown as SpawnFn); + const env: NodeJS.ProcessEnv = {...process.env}; + if (args.trustedKeysPath) env.GNUPGHOME = args.trustedKeysPath; + const child = spawnFn('git', ['verify-tag', args.tag], {cwd: args.repoDir, env, stdio: 'ignore'}); + const code: number | null = await new Promise((resolve) => child.on('close', resolve)); + if (code === 0) return {ok: true, reason: 'signature-verified'}; + return {ok: false, reason: 'signature-verification-failed'}; +}; +``` + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/trustedKeys.test.ts` +Expected: PASS (4 tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/trustedKeys.ts src/tests/backend-new/specs/updater/trustedKeys.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): verifyReleaseTag — gpg-via-git stub for Tier 2 preflight + +Default updates.requireSignature=false: log a warning and return ok. +Set true to make preflight refuse a tag whose signature does not verify +under the system keyring (or trustedKeysPath via GNUPGHOME). Etherpad's +release process does not yet sign tags consistently; turning the check +on by default would break Tier 2 for every admin and forcing a release- +signing change is out of scope for this PR. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 4: Pre-flight checks + +**Files:** +- Create: `src/node/updater/preflight.ts` +- Test: `src/tests/backend-new/specs/updater/preflight.test.ts` + +The `runPreflight` function takes everything it needs as injected dependencies — no direct fs/spawn — so unit tests can stub each individual check. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/preflight.test.ts`: + +```typescript +import {describe, it, expect, vi} from 'vitest'; +import {runPreflight} from '../../../../node/updater/preflight'; + +const baseDeps = { + installMethod: 'git' as const, + workingTreeClean: vi.fn(async () => true), + freeDiskMB: vi.fn(async () => 5000), + pnpmOnPath: vi.fn(async () => true), + lockHeld: vi.fn(async () => false), + remoteHasTag: vi.fn(async () => true), + verifyTag: vi.fn(async () => ({ok: true as const, reason: 'signature-not-required' as const})), +}; + +const baseInput = { + targetTag: 'v2.7.3', + diskSpaceMinMB: 500, + requireSignature: false, + trustedKeysPath: null, +}; + +describe('runPreflight', () => { + it('passes when all checks pass', async () => { + const r = await runPreflight(baseInput, {...baseDeps}); + expect(r).toEqual({ok: true}); + }); + + it('rejects non-writable install methods', async () => { + const r = await runPreflight(baseInput, {...baseDeps, installMethod: 'docker'}); + expect(r).toEqual({ok: false, reason: 'install-method-not-writable'}); + }); + + it('rejects a dirty working tree', async () => { + const r = await runPreflight(baseInput, {...baseDeps, workingTreeClean: vi.fn(async () => false)}); + expect(r).toEqual({ok: false, reason: 'dirty-working-tree'}); + }); + + it('rejects insufficient disk space', async () => { + const r = await runPreflight(baseInput, {...baseDeps, freeDiskMB: vi.fn(async () => 100)}); + expect(r).toEqual({ok: false, reason: 'low-disk-space'}); + }); + + it('rejects when pnpm is missing', async () => { + const r = await runPreflight(baseInput, {...baseDeps, pnpmOnPath: vi.fn(async () => false)}); + expect(r).toEqual({ok: false, reason: 'pnpm-not-found'}); + }); + + it('rejects when the lock is held', async () => { + const r = await runPreflight(baseInput, {...baseDeps, lockHeld: vi.fn(async () => true)}); + expect(r).toEqual({ok: false, reason: 'lock-held'}); + }); + + it('rejects when the remote tag is missing', async () => { + const r = await runPreflight(baseInput, {...baseDeps, remoteHasTag: vi.fn(async () => false)}); + expect(r).toEqual({ok: false, reason: 'remote-tag-missing'}); + }); + + it('rejects when signature verification fails', async () => { + const r = await runPreflight(baseInput, { + ...baseDeps, + verifyTag: vi.fn(async () => ({ok: false as const, reason: 'signature-verification-failed' as const})), + }); + expect(r).toEqual({ok: false, reason: 'signature-verification-failed'}); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/preflight.test.ts` +Expected: FAIL. + +- [ ] **Step 3: Implement** + +Create `src/node/updater/preflight.ts`: + +```typescript +import {InstallMethod} from './types'; +import type {VerifyResult} from './trustedKeys'; + +export type PreflightReason = + | 'install-method-not-writable' + | 'dirty-working-tree' + | 'low-disk-space' + | 'pnpm-not-found' + | 'lock-held' + | 'remote-tag-missing' + | 'signature-verification-failed'; + +export interface PreflightInput { + targetTag: string; + diskSpaceMinMB: number; + requireSignature: boolean; + trustedKeysPath: string | null; +} + +export interface PreflightDeps { + installMethod: Exclude; + workingTreeClean: () => Promise; + freeDiskMB: () => Promise; + pnpmOnPath: () => Promise; + lockHeld: () => Promise; + remoteHasTag: (tag: string) => Promise; + verifyTag: () => Promise; +} + +export type PreflightResult = {ok: true} | {ok: false; reason: PreflightReason}; + +const WRITABLE: ReadonlySet> = new Set(['git']); + +/** + * Sequenced preflight: each check is fast and reads the world. Order matters — + * cheap, definitive failures (install method) run before slow ones (network tag + * lookup, gpg). The first failure short-circuits. + */ +export const runPreflight = async ( + input: PreflightInput, + deps: PreflightDeps, +): Promise => { + if (!WRITABLE.has(deps.installMethod)) return {ok: false, reason: 'install-method-not-writable'}; + if (!await deps.workingTreeClean()) return {ok: false, reason: 'dirty-working-tree'}; + if ((await deps.freeDiskMB()) < input.diskSpaceMinMB) return {ok: false, reason: 'low-disk-space'}; + if (!await deps.pnpmOnPath()) return {ok: false, reason: 'pnpm-not-found'}; + if (await deps.lockHeld()) return {ok: false, reason: 'lock-held'}; + if (!await deps.remoteHasTag(input.targetTag)) return {ok: false, reason: 'remote-tag-missing'}; + const sig = await deps.verifyTag(); + if (!sig.ok) return {ok: false, reason: 'signature-verification-failed'}; + return {ok: true}; +}; +``` + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/preflight.test.ts` +Expected: PASS (8 tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/preflight.ts src/tests/backend-new/specs/updater/preflight.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): preflight check pipeline for Tier 2 + +Pure orchestrator over injected probes for install-method, working tree, +disk space, pnpm presence, lock state, remote tag existence and signature +verification. Cheap-and-definitive checks run first; first failure short- +circuits with a typed reason that the route layer will surface in the +preflight-failed admin banner. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 5: Update log appender + tail + +**Files:** +- Create: `src/node/updater/updateLog.ts` +- Test: `src/tests/backend-new/specs/updater/updateLog.test.ts` + +A dedicated log4js logger writes to `var/log/update.log` with a 10 MB × 5 rolling-file appender. `tailLines(n)` reads the most recent `n` lines from the active log file for the `/admin/update/log` endpoint. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/updateLog.test.ts`: + +```typescript +import {describe, it, expect, beforeEach, afterEach} from 'vitest'; +import fs from 'node:fs/promises'; +import path from 'node:path'; +import os from 'node:os'; +import {tailLines} from '../../../../node/updater/updateLog'; + +describe('tailLines', () => { + let dir: string; + let logPath: string; + beforeEach(async () => { + dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-log-')); + logPath = path.join(dir, 'update.log'); + }); + afterEach(async () => { await fs.rm(dir, {recursive: true, force: true}); }); + + it('returns [] when file is missing', async () => { + expect(await tailLines(logPath, 10)).toEqual([]); + }); + + it('returns up to N lines when file is shorter', async () => { + await fs.writeFile(logPath, 'a\nb\nc\n'); + expect(await tailLines(logPath, 10)).toEqual(['a', 'b', 'c']); + }); + + it('returns the last N when file is longer', async () => { + const lines = Array.from({length: 500}, (_, i) => `line-${i}`); + await fs.writeFile(logPath, lines.join('\n') + '\n'); + expect(await tailLines(logPath, 5)).toEqual(['line-495', 'line-496', 'line-497', 'line-498', 'line-499']); + }); + + it('handles a final-line-without-newline', async () => { + await fs.writeFile(logPath, 'a\nb\nc'); + expect(await tailLines(logPath, 10)).toEqual(['a', 'b', 'c']); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/updateLog.test.ts` +Expected: FAIL. + +- [ ] **Step 3: Implement** + +Create `src/node/updater/updateLog.ts`: + +```typescript +import fs from 'node:fs/promises'; +import path from 'node:path'; +import log4js from 'log4js'; + +let configured = false; + +/** Idempotently register a rolling-file appender for the updater log. */ +export const ensureUpdateLogAppender = (logPath: string): void => { + if (configured) return; + const dir = path.dirname(logPath); + // mkdir is sync-best-effort: log4js will surface any deeper failure on first write. + try { require('node:fs').mkdirSync(dir, {recursive: true}); } catch {/* noop */} + const cfg: any = log4js.getConfig?.() ?? null; + // We don't try to mutate an arbitrary external log4js config — we just add our category. + log4js.addLayout?.('json', () => (e: any) => JSON.stringify({t: e.startTime, lvl: e.level.levelStr, m: e.data.join(' ')})); + log4js.configure({ + appenders: { + ...(cfg?.appenders || {}), + updateLog: {type: 'file', filename: logPath, maxLogSize: 10 * 1024 * 1024, backups: 5, compress: false}, + }, + categories: { + ...(cfg?.categories || {default: {appenders: ['out'], level: 'info'}}), + updater: {appenders: ['updateLog'], level: 'info'}, + }, + }); + configured = true; +}; + +/** Read the last `n` newline-separated lines from the active log file. Empty array if missing. */ +export const tailLines = async (logPath: string, n: number): Promise => { + let raw: string; + try { raw = await fs.readFile(logPath, 'utf8'); } + catch (err: any) { if (err.code === 'ENOENT') return []; throw err; } + const stripped = raw.endsWith('\n') ? raw.slice(0, -1) : raw; + if (stripped.length === 0) return []; + const all = stripped.split('\n'); + return all.slice(Math.max(0, all.length - n)); +}; +``` + +> **Note on `log4js.configure`:** Etherpad's main entrypoint already calls `log4js.configure` once. Calling it again replaces the config. The `cfg = log4js.getConfig?.()` spread above preserves the existing appenders and categories so we only *add* `updateLog` and the `updater` category. If `getConfig` isn't exposed in the runtime version of log4js, the fallback writes both `default` and `updater` so existing log lines still go somewhere — verify behaviour with the smoke test below. + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/updateLog.test.ts` +Expected: PASS (4 tests). + +- [ ] **Step 5: Smoke-test the appender against the real boot path** + +Run: `pnpm run dev -- --port 9003 &` (start in background) then `tail -n 20 var/log/etherpad.log`. Confirm normal logs still appear, then `curl -fsSL http://localhost:9003/health` and verify the existing `default` appender output is unchanged. Stop with `kill %1`. + +If existing logs disappear, the spread of `cfg.appenders/categories` did not preserve them — adjust `ensureUpdateLogAppender` to use the appender registration API rather than `configure`. (Concretely: many log4js builds support `log4js.recording()` or one can keep a reference to the original config from `Settings.ts`'s `log4js.configure(...)` call and re-apply it merged. If the `getConfig?` path returns `null`, fall back to copying the layout from `settings.logconfig` which is what `Settings.ts` builds.) + +- [ ] **Step 6: Commit** + +```bash +git add src/node/updater/updateLog.ts src/tests/backend-new/specs/updater/updateLog.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): rolling update.log appender + tailLines helper + +ensureUpdateLogAppender adds a 10MB x 5 rolling-file appender for the +'updater' log4js category at var/log/update.log; tailLines reads the +last N lines for the /admin/update/log streaming endpoint without +loading the whole file into memory if a partial read suffices. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 6: SessionDrainer + +**Files:** +- Create: `src/node/updater/SessionDrainer.ts` +- Test: `src/tests/backend-new/specs/updater/SessionDrainer.test.ts` + +The drainer schedules three broadcasts (T-60, T-30, T-10), flips a module-level "no new connections" flag, and resolves a promise at T=0. The flag is read by a lightweight check we'll add to PadMessageHandler in this same task. Tests use fake timers and a stubbed broadcaster. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/SessionDrainer.test.ts`: + +```typescript +import {describe, it, expect, vi, beforeEach, afterEach} from 'vitest'; +import {createDrainer, isAcceptingConnections, _resetForTests} from '../../../../node/updater/SessionDrainer'; + +describe('SessionDrainer', () => { + beforeEach(() => { vi.useFakeTimers(); _resetForTests(); }); + afterEach(() => { vi.useRealTimers(); _resetForTests(); }); + + it('emits T-60, T-30, T-10 and resolves at T=0', async () => { + const broadcasts: Array<{at: number; key: string}> = []; + const drainer = createDrainer({ + drainSeconds: 60, + broadcast: (key, _values) => { broadcasts.push({at: Date.now(), key}); }, + }); + const start = Date.now(); + const done = drainer.start(); + // T-60 broadcast fires immediately on start. + expect(broadcasts.map((b) => b.key)).toEqual(['update.drain.t60']); + await vi.advanceTimersByTimeAsync(30_000); + expect(broadcasts.map((b) => b.key)).toEqual(['update.drain.t60', 'update.drain.t30']); + await vi.advanceTimersByTimeAsync(20_000); + expect(broadcasts.map((b) => b.key)).toEqual([ + 'update.drain.t60', 'update.drain.t30', 'update.drain.t10', + ]); + await vi.advanceTimersByTimeAsync(10_000); + await done; + expect(Date.now() - start).toBe(60_000); + }); + + it('flips isAcceptingConnections to false during drain and back on cancel', () => { + const drainer = createDrainer({drainSeconds: 60, broadcast: () => {}}); + expect(isAcceptingConnections()).toBe(true); + drainer.start(); + expect(isAcceptingConnections()).toBe(false); + drainer.cancel(); + expect(isAcceptingConnections()).toBe(true); + }); + + it('cancel before T=0 resolves the start() promise as cancelled', async () => { + const drainer = createDrainer({drainSeconds: 60, broadcast: () => {}}); + const done = drainer.start(); + await vi.advanceTimersByTimeAsync(20_000); + drainer.cancel(); + const r = await done; + expect(r).toEqual({outcome: 'cancelled'}); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/SessionDrainer.test.ts` +Expected: FAIL. + +- [ ] **Step 3: Implement** + +Create `src/node/updater/SessionDrainer.ts`: + +```typescript +let acceptingConnections = true; + +export const isAcceptingConnections = (): boolean => acceptingConnections; +export const _resetForTests = (): void => { acceptingConnections = true; }; + +export interface DrainerOpts { + drainSeconds: number; + /** Called for every broadcast; the i18n key is fixed but `values` may carry timing data. */ + broadcast: (i18nKey: 'update.drain.t60' | 'update.drain.t30' | 'update.drain.t10', values: Record) => void; +} + +export interface Drainer { + start: () => Promise<{outcome: 'completed' | 'cancelled'}>; + cancel: () => void; +} + +export const createDrainer = ({drainSeconds, broadcast}: DrainerOpts): Drainer => { + const timers: NodeJS.Timeout[] = []; + let resolveDone: ((r: {outcome: 'completed' | 'cancelled'}) => void) | null = null; + let cancelled = false; + + const fire = (k: 'update.drain.t60' | 'update.drain.t30' | 'update.drain.t10', secondsRemaining: number) => { + if (cancelled) return; + broadcast(k, {seconds: secondsRemaining}); + }; + + const start = (): Promise<{outcome: 'completed' | 'cancelled'}> => { + if (resolveDone) return Promise.reject(new Error('drainer already started')); + acceptingConnections = false; + return new Promise((resolve) => { + resolveDone = resolve; + const ms = drainSeconds * 1000; + // T-60 broadcast fires at start; T-30 and T-10 at offsets. + fire('update.drain.t60', drainSeconds); + timers.push(setTimeout(() => fire('update.drain.t30', 30), Math.max(0, ms - 30_000))); + timers.push(setTimeout(() => fire('update.drain.t10', 10), Math.max(0, ms - 10_000))); + timers.push(setTimeout(() => { + if (cancelled) return; + acceptingConnections = true; // executor takes over from here; flag goes back on after exit/restart anyway + resolveDone?.({outcome: 'completed'}); + resolveDone = null; + }, ms)); + }); + }; + + const cancel = (): void => { + if (cancelled) return; + cancelled = true; + for (const t of timers) clearTimeout(t); + timers.length = 0; + acceptingConnections = true; + resolveDone?.({outcome: 'cancelled'}); + resolveDone = null; + }; + + return {start, cancel}; +}; +``` + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/SessionDrainer.test.ts` +Expected: PASS (3 tests). + +- [ ] **Step 5: Wire `isAcceptingConnections` into the socket handshake** + +In `src/node/handler/PadMessageHandler.ts`, near the top of `handleMessage` (or wherever new socket connections enter the pad-message pipeline — pick the function that runs on every incoming socket and short-circuits before the Pad lookup), add: + +```typescript +import {isAcceptingConnections} from '../updater/SessionDrainer'; + +// ...inside the connection-accept path, before any expensive work: +if (!isAcceptingConnections()) { + socket.json.send({disconnect: 'updateInProgress'}); + socket.disconnect(true); + return; +} +``` + +Locate the existing connection-accept path with: `grep -nE "handleMessage|handleClientReady" src/node/handler/PadMessageHandler.ts | head`. Place the guard inside `handleClientReady` before the Pad is fetched. + +- [ ] **Step 6: Add a regression test for the guard** + +Create `src/tests/backend-new/specs/updater/drainer-handshake.test.ts`: + +```typescript +import {describe, it, expect, beforeEach, afterEach, vi} from 'vitest'; + +describe('PadMessageHandler refuses connections during drain', () => { + beforeEach(() => { vi.resetModules(); }); + afterEach(() => { vi.resetModules(); }); + + it('handleClientReady disconnects when isAcceptingConnections is false', async () => { + vi.doMock('../../../../node/updater/SessionDrainer', () => ({ + isAcceptingConnections: () => false, + })); + const PadMessageHandler = await import('../../../../node/handler/PadMessageHandler'); + const sent: any[] = []; + let disconnected = false; + const fakeSocket: any = { + id: 'sock-1', + json: {send: (m: unknown) => sent.push(m)}, + disconnect: () => { disconnected = true; }, + conn: {request: {}}, + }; + // handleClientReady takes (socket, message); message can be a stub. + if (typeof (PadMessageHandler as any).handleClientReady === 'function') { + await (PadMessageHandler as any).handleClientReady(fakeSocket, {padId: 'doesntmatter'}); + } else { + // Fallback to handleMessage if handleClientReady is private. + await (PadMessageHandler as any).handleMessage(fakeSocket, {type: 'CLIENT_READY', padId: 'doesntmatter'}); + } + expect(disconnected).toBe(true); + expect(sent[0]).toEqual({disconnect: 'updateInProgress'}); + }); +}); +``` + +- [ ] **Step 7: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/` +Expected: all updater unit tests PASS. + +- [ ] **Step 8: Commit** + +```bash +git add src/node/updater/SessionDrainer.ts src/node/handler/PadMessageHandler.ts \ + src/tests/backend-new/specs/updater/SessionDrainer.test.ts \ + src/tests/backend-new/specs/updater/drainer-handshake.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): SessionDrainer + handshake guard + +Drainer schedules T-60/-30/-10 shoutMessage broadcasts and resolves at T=0; +PadMessageHandler short-circuits new CLIENT_READY messages while the +drainer's flag is off, so admins applying an update don't get a stampede +of fresh sockets between the broadcast and exit 75. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 7: UpdateExecutor + +**Files:** +- Create: `src/node/updater/UpdateExecutor.ts` +- Test: `src/tests/backend-new/specs/updater/UpdateExecutor.test.ts` + +The executor accepts injected `spawnFn`, `fs`, `now`, `exit`, and `saveState` so unit tests run without spawning real children or mutating the real install. It writes `state.execution` at every transition and copies `pnpm-lock.yaml` + the current SHA to `var/update-backup/` before any mutation. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/UpdateExecutor.test.ts`: + +```typescript +import {describe, it, expect, vi, beforeEach} from 'vitest'; +import {executeUpdate} from '../../../../node/updater/UpdateExecutor'; +import {EMPTY_STATE} from '../../../../node/updater/types'; + +const okSpawn = (script: Array<{cmd: string; exit: number; stderr?: string}>) => { + let i = 0; + return vi.fn((cmd: string, args: string[]) => { + const step = script[i++]; + if (!step) throw new Error(`Unexpected spawn call: ${cmd} ${args.join(' ')}`); + if (step.cmd !== `${cmd} ${args.join(' ')}`) { + throw new Error(`Spawn order mismatch: expected "${step.cmd}", got "${cmd} ${args.join(' ')}"`); + } + return { + stdout: {on: () => {}}, stderr: {on: (e: string, cb: any) => step.stderr && e === 'data' && cb(Buffer.from(step.stderr))}, + on: (e: string, cb: any) => e === 'close' && setTimeout(() => cb(step.exit), 0), + } as any; + }); +}; + +describe('executeUpdate happy path', () => { + let savedStates: any[] = []; + let written: Record = {}; + let exited: number | null = null; + + beforeEach(() => { savedStates = []; written = {}; exited = null; }); + + const baseDeps = () => ({ + repoDir: '/srv/etherpad', + backupDir: '/srv/etherpad/var/update-backup', + spawnFn: okSpawn([ + {cmd: 'git rev-parse HEAD', exit: 0}, + {cmd: 'git fetch --tags origin', exit: 0}, + {cmd: 'git checkout v2.7.3', exit: 0}, + {cmd: 'pnpm install --frozen-lockfile', exit: 0}, + {cmd: 'pnpm run build:ui', exit: 0}, + ]), + readSha: vi.fn(async () => 'abc123'), + copyFile: vi.fn(async (_a: string, _b: string) => { written[_b] = 'lock'; }), + saveState: vi.fn(async (s: any) => { savedStates.push(structuredClone(s)); }), + initialState: structuredClone(EMPTY_STATE), + targetTag: 'v2.7.3', + now: () => new Date('2026-05-08T10:00:00Z'), + exit: (code: number) => { exited = code; }, + }); + + it('snapshots, runs steps, persists pending-verification, exits 75', async () => { + const deps = baseDeps(); + const result = await executeUpdate(deps); + expect(result).toEqual({outcome: 'pending-verification'}); + expect(deps.copyFile).toHaveBeenCalledWith( + '/srv/etherpad/pnpm-lock.yaml', + '/srv/etherpad/var/update-backup/pnpm-lock.yaml', + ); + expect(savedStates.at(-1).execution.status).toBe('pending-verification'); + expect(savedStates.at(-1).execution.fromSha).toBe('abc123'); + expect(savedStates.at(-1).bootCount).toBe(0); + expect(exited).toBe(75); + }); + + it('install failure flips state to rolling-back', async () => { + const deps = baseDeps(); + deps.spawnFn = okSpawn([ + {cmd: 'git rev-parse HEAD', exit: 0}, + {cmd: 'git fetch --tags origin', exit: 0}, + {cmd: 'git checkout v2.7.3', exit: 0}, + {cmd: 'pnpm install --frozen-lockfile', exit: 1, stderr: 'resolver bork'}, + ]); + const result = await executeUpdate(deps); + expect(result.outcome).toBe('failed-install'); + expect(savedStates.at(-1).execution.status).toBe('rolling-back'); + expect(exited).toBe(null); // executor does not exit; rollback path drives the next exit + }); + + it('build failure flips state to rolling-back', async () => { + const deps = baseDeps(); + deps.spawnFn = okSpawn([ + {cmd: 'git rev-parse HEAD', exit: 0}, + {cmd: 'git fetch --tags origin', exit: 0}, + {cmd: 'git checkout v2.7.3', exit: 0}, + {cmd: 'pnpm install --frozen-lockfile', exit: 0}, + {cmd: 'pnpm run build:ui', exit: 2}, + ]); + const result = await executeUpdate(deps); + expect(result.outcome).toBe('failed-build'); + expect(savedStates.at(-1).execution.status).toBe('rolling-back'); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/UpdateExecutor.test.ts` +Expected: FAIL (module missing). + +- [ ] **Step 3: Implement** + +Create `src/node/updater/UpdateExecutor.ts`: + +```typescript +import path from 'node:path'; +import log4js from 'log4js'; +import {SpawnOptions} from 'node:child_process'; +import {UpdateState} from './types'; + +const logger = log4js.getLogger('updater'); + +export type SpawnFn = (cmd: string, args: string[], opts: SpawnOptions) => { + stdout: {on: (event: 'data', cb: (chunk: Buffer) => void) => void}; + stderr: {on: (event: 'data', cb: (chunk: Buffer) => void) => void}; + on: (event: 'close', cb: (code: number | null) => void) => void; +}; + +export interface ExecutorDeps { + repoDir: string; + backupDir: string; + spawnFn: SpawnFn; + readSha: () => Promise; + copyFile: (src: string, dst: string) => Promise; + saveState: (s: UpdateState) => Promise; + initialState: UpdateState; + targetTag: string; + now: () => Date; + exit: (code: number) => void; +} + +export type ExecutorResult = + | {outcome: 'pending-verification'} + | {outcome: 'failed-install'; reason: string} + | {outcome: 'failed-build'; reason: string} + | {outcome: 'failed-checkout'; reason: string}; + +const runStep = (spawnFn: SpawnFn, repoDir: string, cmd: string, args: string[]): + Promise<{code: number | null; stderr: string}> => new Promise((resolve) => { + let stderr = ''; + const child = spawnFn(cmd, args, {cwd: repoDir, stdio: ['ignore', 'pipe', 'pipe']}); + child.stdout.on('data', (chunk: Buffer) => logger.info(`[${cmd}] ${chunk.toString().trimEnd()}`)); + child.stderr.on('data', (chunk: Buffer) => { stderr += chunk.toString(); logger.warn(`[${cmd}] ${chunk.toString().trimEnd()}`); }); + child.on('close', (code) => resolve({code, stderr})); +}); + +/** + * Run the update pipeline. Each step writes state before/after so a hard kill + * mid-step lands the next boot in a known state for RollbackHandler to resolve. + * + * On install/build failure the executor transitions to `rolling-back`, persists, + * and returns. The route layer hands control to RollbackHandler which restores + * the lockfile and SHA. The executor does NOT exit on failure paths — the + * rollback path owns that exit. + */ +export const executeUpdate = async (deps: ExecutorDeps): Promise => { + const fromSha = await deps.readSha(); + let s: UpdateState = { + ...deps.initialState, + execution: {status: 'executing', targetTag: deps.targetTag, fromSha, startedAt: deps.now().toISOString()}, + bootCount: 0, + }; + await deps.saveState(s); + + // Snapshot lockfile (SHA captured above). + await deps.copyFile(path.join(deps.repoDir, 'pnpm-lock.yaml'), path.join(deps.backupDir, 'pnpm-lock.yaml')); + + const fail = async ( + outcome: 'failed-install' | 'failed-build' | 'failed-checkout', + reason: string, + ): Promise => { + s = { + ...s, + execution: {status: 'rolling-back', reason, targetTag: deps.targetTag, fromSha, at: deps.now().toISOString()}, + }; + await deps.saveState(s); + logger.error(`update step failed (${outcome}): ${reason}`); + return {outcome, reason}; + }; + + let r = await runStep(deps.spawnFn, deps.repoDir, 'git', ['fetch', '--tags', 'origin']); + if (r.code !== 0) return fail('failed-checkout', `git fetch exit ${r.code}: ${r.stderr.trim()}`); + + r = await runStep(deps.spawnFn, deps.repoDir, 'git', ['checkout', deps.targetTag]); + if (r.code !== 0) return fail('failed-checkout', `git checkout exit ${r.code}: ${r.stderr.trim()}`); + + r = await runStep(deps.spawnFn, deps.repoDir, 'pnpm', ['install', '--frozen-lockfile']); + if (r.code !== 0) return fail('failed-install', `pnpm install exit ${r.code}: ${r.stderr.trim()}`); + + r = await runStep(deps.spawnFn, deps.repoDir, 'pnpm', ['run', 'build:ui']); + if (r.code !== 0) return fail('failed-build', `pnpm run build:ui exit ${r.code}: ${r.stderr.trim()}`); + + // Pending-verification: the next boot's RollbackHandler arms the health-check timer. + s = { + ...s, + execution: { + status: 'pending-verification', + targetTag: deps.targetTag, + fromSha, + // RollbackHandler computes the actual deadline at boot using rollbackHealthCheckSeconds. + // We persist a placeholder so the field is present. + deadlineAt: deps.now().toISOString(), + }, + bootCount: 0, + }; + await deps.saveState(s); + logger.info(`update executed: ${fromSha} -> ${deps.targetTag}; exiting 75 for supervisor restart`); + deps.exit(75); + return {outcome: 'pending-verification'}; +}; +``` + +> The test stubs `readSha`/`copyFile`/`saveState` because the production caller (in Task 11) provides real implementations. The executor's body never imports `node:fs` or real spawn — keeping the unit test fast and isolated. + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/UpdateExecutor.test.ts` +Expected: PASS (3 tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/UpdateExecutor.ts src/tests/backend-new/specs/updater/UpdateExecutor.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): UpdateExecutor — snapshot, fetch/checkout/install/build, exit 75 + +Pure-DI orchestrator: every shell-out goes through an injected spawnFn, +every fs touch through an injected fs facade, every state write through +the saveState dependency. Unit tests cover the happy path + the install +and build failure transitions to rolling-back. The rollback path itself +lives in Task 8 (RollbackHandler); on failure the executor persists +state and returns without exiting so the route layer can run rollback. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 8: RollbackHandler + +**Files:** +- Create: `src/node/updater/RollbackHandler.ts` +- Test: `src/tests/backend-new/specs/updater/RollbackHandler.test.ts` + +Two paths: + +1. **`checkPendingVerification(state)`** runs at boot. If `state.execution.status === 'pending-verification'`, increment `bootCount`, persist, and either (a) if `bootCount > 2` force an immediate rollback, or (b) arm a 60s timer that on expiry rolls back, on success marks `verified`. Health success is signalled externally — for PR 2 we treat completion of boot through `expressCreateServer` as the success signal (RollbackHandler exposes a `markVerified()` callable). +2. **`performRollback(reason)`** runs from inside the executor's failure paths *and* from the boot-time crash-loop / health-timeout paths. It copies the backup lockfile back, runs `git checkout `, `pnpm install --frozen-lockfile`, persists `rolled-back` (or `rollback-failed` on any sub-step error), and exits 75. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/RollbackHandler.test.ts`: + +```typescript +import {describe, it, expect, vi, beforeEach} from 'vitest'; +import {checkPendingVerification, performRollback} from '../../../../node/updater/RollbackHandler'; +import {EMPTY_STATE} from '../../../../node/updater/types'; + +const baseDeps = () => ({ + repoDir: '/srv/etherpad', + backupDir: '/srv/etherpad/var/update-backup', + spawnFn: vi.fn((_c: string, _a: string[]) => ({ + stdout: {on: () => {}}, stderr: {on: () => {}}, + on: (e: string, cb: any) => e === 'close' && setTimeout(() => cb(0), 0), + })) as any, + copyFile: vi.fn(async (_a: string, _b: string) => {}), + saveState: vi.fn(async (_s: any) => {}), + exit: vi.fn((_code: number) => {}), + now: () => new Date('2026-05-08T10:00:00Z'), +}); + +describe('checkPendingVerification', () => { + beforeEach(() => { vi.useFakeTimers(); }); + + it('idle state is a no-op', async () => { + const r = checkPendingVerification(structuredClone(EMPTY_STATE), { + ...baseDeps(), rollbackHealthCheckSeconds: 60, + }); + expect(r.armed).toBe(false); + }); + + it('pending-verification with bootCount<=2 arms a timer and increments bootCount', async () => { + const deps = baseDeps(); + const state = { + ...structuredClone(EMPTY_STATE), + execution: {status: 'pending-verification', targetTag: 'v2.7.3', fromSha: 'abc', deadlineAt: '2026-05-08T10:00:00Z'} as const, + bootCount: 0, + }; + const r = checkPendingVerification(state, {...deps, rollbackHealthCheckSeconds: 60}); + expect(r.armed).toBe(true); + // bootCount has been bumped and state persisted. + expect(deps.saveState).toHaveBeenCalledWith(expect.objectContaining({bootCount: 1})); + // markVerified clears the timer and lands on `verified`. + r.markVerified(); + await vi.advanceTimersByTimeAsync(60_000); + expect(deps.exit).not.toHaveBeenCalled(); + }); + + it('pending-verification with bootCount>2 forces immediate rollback', async () => { + const deps = baseDeps(); + const state = { + ...structuredClone(EMPTY_STATE), + execution: {status: 'pending-verification', targetTag: 'v2.7.3', fromSha: 'abc', deadlineAt: '2026-05-08T10:00:00Z'} as const, + bootCount: 3, + }; + const r = checkPendingVerification(state, {...deps, rollbackHealthCheckSeconds: 60}); + expect(r.armed).toBe(false); + // Rollback ran; exit 75 was called once we hit the end of performRollback. + await vi.runAllTimersAsync(); + expect(deps.exit).toHaveBeenCalledWith(75); + }); + + it('timer expiry triggers rollback when markVerified is never called', async () => { + const deps = baseDeps(); + const state = { + ...structuredClone(EMPTY_STATE), + execution: {status: 'pending-verification', targetTag: 'v2.7.3', fromSha: 'abc', deadlineAt: '2026-05-08T10:00:00Z'} as const, + bootCount: 0, + }; + const r = checkPendingVerification(state, {...deps, rollbackHealthCheckSeconds: 60}); + expect(r.armed).toBe(true); + await vi.advanceTimersByTimeAsync(60_000); + expect(deps.exit).toHaveBeenCalledWith(75); + }); +}); + +describe('performRollback', () => { + it('happy path: restores lockfile, checkout from-sha, pnpm install, exit 75, status=rolled-back', async () => { + const deps = baseDeps(); + const state = { + ...structuredClone(EMPTY_STATE), + execution: {status: 'rolling-back', reason: 'install-failed', targetTag: 'v2.7.3', fromSha: 'abc', at: '2026-05-08T10:00:00Z'} as const, + bootCount: 0, + }; + await performRollback(state, {...deps, rollbackHealthCheckSeconds: 60}); + expect(deps.copyFile).toHaveBeenCalledWith( + '/srv/etherpad/var/update-backup/pnpm-lock.yaml', + '/srv/etherpad/pnpm-lock.yaml', + ); + expect(deps.saveState).toHaveBeenLastCalledWith(expect.objectContaining({ + execution: expect.objectContaining({status: 'rolled-back'}), + lastResult: expect.objectContaining({outcome: 'rolled-back'}), + })); + expect(deps.exit).toHaveBeenCalledWith(75); + }); + + it('rollback failure lands on rollback-failed (terminal)', async () => { + const deps = baseDeps(); + let i = 0; + deps.spawnFn = vi.fn(() => ({ + stdout: {on: () => {}}, stderr: {on: () => {}}, + on: (e: string, cb: any) => e === 'close' && setTimeout(() => cb(i++ === 0 ? 0 : 1), 0), + })) as any; + const state = { + ...structuredClone(EMPTY_STATE), + execution: {status: 'rolling-back', reason: 'install-failed', targetTag: 'v2.7.3', fromSha: 'abc', at: '2026-05-08T10:00:00Z'} as const, + bootCount: 0, + }; + await performRollback(state, {...deps, rollbackHealthCheckSeconds: 60}); + expect(deps.saveState).toHaveBeenLastCalledWith(expect.objectContaining({ + execution: expect.objectContaining({status: 'rollback-failed'}), + lastResult: expect.objectContaining({outcome: 'rollback-failed'}), + })); + expect(deps.exit).toHaveBeenCalledWith(75); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/RollbackHandler.test.ts` +Expected: FAIL (module missing). + +- [ ] **Step 3: Implement** + +Create `src/node/updater/RollbackHandler.ts`: + +```typescript +import path from 'node:path'; +import log4js from 'log4js'; +import {SpawnOptions} from 'node:child_process'; +import {UpdateState} from './types'; +import type {SpawnFn} from './UpdateExecutor'; + +const logger = log4js.getLogger('updater'); + +export interface RollbackDeps { + repoDir: string; + backupDir: string; + spawnFn: SpawnFn; + copyFile: (src: string, dst: string) => Promise; + saveState: (s: UpdateState) => Promise; + exit: (code: number) => void; + now: () => Date; + rollbackHealthCheckSeconds: number; +} + +const runStep = (spawnFn: SpawnFn, cwd: string, cmd: string, args: string[]): + Promise => new Promise((resolve) => { + const child = spawnFn(cmd, args, {cwd, stdio: ['ignore', 'pipe', 'pipe']}); + child.stdout.on('data', (b: Buffer) => logger.info(`[${cmd}] ${b.toString().trimEnd()}`)); + child.stderr.on('data', (b: Buffer) => logger.warn(`[${cmd}] ${b.toString().trimEnd()}`)); + child.on('close', (c) => resolve(c)); +}); + +/** Restore the previous SHA + lockfile. Lands on `rolled-back` on success, `rollback-failed` on any sub-step error. Always exits 75 so the supervisor restarts on a known state. */ +export const performRollback = async (state: UpdateState, deps: RollbackDeps): Promise => { + const exec = state.execution; + if (exec.status !== 'rolling-back' && exec.status !== 'pending-verification') { + throw new Error(`performRollback called from unexpected status: ${exec.status}`); + } + const fromSha = (exec as {fromSha: string}).fromSha; + const targetTag = (exec as {targetTag: string}).targetTag; + const reason = exec.status === 'rolling-back' ? exec.reason : 'health-check-failed-or-crash-loop'; + const failTerminal = async (subReason: string): Promise => { + const at = deps.now().toISOString(); + await deps.saveState({ + ...state, + execution: {status: 'rollback-failed', reason: `${reason}; rollback also failed: ${subReason}`, targetTag, fromSha, at}, + lastResult: {targetTag, fromSha, outcome: 'rollback-failed', reason: `${reason}; rollback failed: ${subReason}`, at}, + bootCount: 0, + }); + logger.error(`rollback FAILED: ${subReason}; manual intervention required (POST /admin/update/acknowledge after fixing)`); + deps.exit(75); + }; + + try { + await deps.copyFile(path.join(deps.backupDir, 'pnpm-lock.yaml'), path.join(deps.repoDir, 'pnpm-lock.yaml')); + } catch (err) { + return failTerminal(`copy lockfile: ${(err as Error).message}`); + } + + const checkoutCode = await runStep(deps.spawnFn, deps.repoDir, 'git', ['checkout', fromSha]); + if (checkoutCode !== 0) return failTerminal(`git checkout ${fromSha} exit ${checkoutCode}`); + + const installCode = await runStep(deps.spawnFn, deps.repoDir, 'pnpm', ['install', '--frozen-lockfile']); + if (installCode !== 0) return failTerminal(`pnpm install exit ${installCode}`); + + const at = deps.now().toISOString(); + await deps.saveState({ + ...state, + execution: {status: 'rolled-back', reason, targetTag, restoredSha: fromSha, at}, + lastResult: {targetTag, fromSha, outcome: 'rolled-back', reason, at}, + bootCount: 0, + }); + logger.warn(`rolled back to ${fromSha} (reason: ${reason})`); + deps.exit(75); +}; + +export interface CheckResult { + /** True if a health-check timer was armed and is awaiting markVerified or expiry. */ + armed: boolean; + /** Cancels the timer and transitions to `verified`. No-op when armed is false. */ + markVerified: () => void; +} + +/** + * Inspect the persisted execution state at boot and react: + * - idle / verified / etc.: no-op. + * - pending-verification with bootCount > 2: force rollback (crash-loop guard). + * - pending-verification otherwise: increment bootCount, persist, arm a timer. + */ +export const checkPendingVerification = (state: UpdateState, deps: RollbackDeps): CheckResult => { + const exec = state.execution; + if (exec.status !== 'pending-verification') return {armed: false, markVerified: () => {}}; + + if (state.bootCount > 2) { + // Don't await — fire and forget so boot proceeds and exit happens asynchronously. + void performRollback(state, deps); + return {armed: false, markVerified: () => {}}; + } + + const incremented: UpdateState = {...state, bootCount: state.bootCount + 1}; + void deps.saveState(incremented); + + let cleared = false; + const timer = setTimeout(() => { + if (cleared) return; + void performRollback({ + ...incremented, + execution: {status: 'rolling-back', reason: 'health-check-timeout', targetTag: exec.targetTag, fromSha: exec.fromSha, at: deps.now().toISOString()}, + }, deps); + }, deps.rollbackHealthCheckSeconds * 1000); + + return { + armed: true, + markVerified: () => { + if (cleared) return; + cleared = true; + clearTimeout(timer); + const at = deps.now().toISOString(); + void deps.saveState({ + ...incremented, + execution: {status: 'verified', targetTag: exec.targetTag, verifiedAt: at}, + lastResult: {targetTag: exec.targetTag, fromSha: exec.fromSha, outcome: 'verified', reason: null, at}, + bootCount: 0, + }); + logger.info(`update verified after restart: ${exec.fromSha} -> ${exec.targetTag}`); + }, + }; +}; +``` + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/RollbackHandler.test.ts` +Expected: PASS (5 tests). + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/RollbackHandler.ts src/tests/backend-new/specs/updater/RollbackHandler.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): RollbackHandler — health-check timer + crash-loop guard + +checkPendingVerification arms a 60s health-check timer at boot when state +is pending-verification, increments bootCount, and forces an immediate +rollback when bootCount>2 (crash-loop guard). performRollback restores the +lockfile and SHA, retries pnpm install, and lands on rolled-back or the +terminal rollback-failed state on sub-step failure. Both paths exit 75 so +the supervisor restarts cleanly on the new known state. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 9: Wire RollbackHandler into the boot sequence + +**Files:** +- Modify: `src/node/updater/index.ts` +- Modify: `src/node/hooks/express/updateStatus.ts` (extend status endpoint with execution + lastResult) +- Test: `src/tests/backend-new/specs/updater/index-boot.test.ts` + +Boot sequence add: after `detectInstallMethod`, before `startPolling`, run `checkPendingVerification`. Stash the returned `markVerified` so `expressCreateServer`'s success path can call it once Etherpad is `RUNNING`. + +- [ ] **Step 1: Failing test** + +Create `src/tests/backend-new/specs/updater/index-boot.test.ts`: + +```typescript +import {describe, it, expect, beforeEach, afterEach, vi} from 'vitest'; + +describe('updater boot wiring', () => { + beforeEach(() => { vi.resetModules(); }); + afterEach(() => { vi.resetModules(); }); + + it('calls checkPendingVerification with the loaded state', async () => { + const calls: any[] = []; + vi.doMock('../../../../node/updater/RollbackHandler', () => ({ + checkPendingVerification: (s: any) => { calls.push(s); return {armed: false, markVerified: () => {}}; }, + performRollback: vi.fn(), + })); + vi.doMock('../../../../node/updater/InstallMethodDetector', () => ({ + detectInstallMethod: vi.fn(async () => 'git'), + })); + vi.doMock('../../../../node/updater/state', () => ({ + loadState: vi.fn(async () => ({schemaVersion: 1, execution: {status: 'idle'}, bootCount: 0, lastResult: null, + lastCheckAt: null, lastEtag: null, latest: null, vulnerableBelow: [], + email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null}})), + saveState: vi.fn(async () => {}), + })); + vi.doMock('../../../../node/utils/Settings', () => ({ + default: {root: '/srv/etherpad', updates: {tier: 'manual', githubRepo: 'ether/etherpad', checkIntervalHours: 6, installMethod: 'auto', rollbackHealthCheckSeconds: 60}, adminEmail: null}, + getEpVersion: () => '2.7.2', + })); + const updater = await import('../../../../node/updater'); + await updater.expressCreateServer(); + expect(calls).toHaveLength(1); + await updater.shutdown(); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/index-boot.test.ts` +Expected: FAIL. + +- [ ] **Step 3: Wire it up** + +In `src/node/updater/index.ts`, add the import and the boot hook: + +```typescript +import {spawn} from 'node:child_process'; +import fs from 'node:fs/promises'; +import {checkPendingVerification, performRollback, CheckResult} from './RollbackHandler'; +import {ensureUpdateLogAppender} from './updateLog'; + +let pendingVerification: CheckResult | null = null; + +const rollbackDeps = () => ({ + repoDir: settings.root, + backupDir: path.join(settings.root, 'var', 'update-backup'), + spawnFn: spawn as unknown as import('./UpdateExecutor').SpawnFn, + copyFile: (src: string, dst: string) => fs.copyFile(src, dst), + saveState: (s: UpdateState) => saveState(stateFilePath(), s), + exit: (code: number) => process.exit(code), + now: () => new Date(), + rollbackHealthCheckSeconds: Number(settings.updates.rollbackHealthCheckSeconds) || 60, +}); +``` + +Replace `expressCreateServer` with: + +```typescript +export const expressCreateServer = async (): Promise => { + ensureUpdateLogAppender(path.join(settings.root, 'var', 'log', 'update.log')); + detectedMethod = await detectInstallMethod({ + override: settings.updates.installMethod, + rootDir: settings.root, + }); + logger.info(`updater: install method = ${detectedMethod}, tier = ${settings.updates.tier}`); + + const state = await getCurrentState(); + pendingVerification = checkPendingVerification(state, rollbackDeps()); + + if (settings.updates.tier !== 'off') startPolling(); +}; + +/** Called by the Etherpad runtime once the express stack is fully wired and /health is up. */ +export const markBootHealthy = (): void => { + if (pendingVerification) { + pendingVerification.markVerified(); + pendingVerification = null; + } +}; + +/** Exposed for routes. */ +export const getRollbackDeps = rollbackDeps; +export const getPendingVerification = () => pendingVerification; +``` + +In `src/node/server.ts`, after the `state = State.RUNNING` line (around line 176), add: + +```typescript +// Once the server is RUNNING, /health responds 200 — that is the implicit health +// signal the updater's pending-verification timer is waiting for. +try { + // eslint-disable-next-line @typescript-eslint/no-var-requires + require('./updater').markBootHealthy(); +} catch (err) { + logger.debug(`markBootHealthy: ${(err as Error).message}`); +} +``` + +In `src/node/hooks/express/updateStatus.ts`, extend the `/admin/update/status` response: + +```typescript +res.json({ + currentVersion: current, + latest: state.latest, + lastCheckAt: state.lastCheckAt, + installMethod, + tier: settings.updates.tier, + policy, + vulnerableBelow: state.vulnerableBelow, + // PR 2 additions: + execution: state.execution, + lastResult: state.lastResult, + lockHeld: await import('../../updater/lock').then((m) => m.isHeld(require('node:path').join(settings.root, 'var', 'update.lock'))), +}); +``` + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/index-boot.test.ts` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/index.ts src/node/server.ts src/node/hooks/express/updateStatus.ts \ + src/tests/backend-new/specs/updater/index-boot.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): wire RollbackHandler into boot + extend /admin/update/status + +expressCreateServer now invokes checkPendingVerification before polling +starts; server.ts calls markBootHealthy after state hits RUNNING so the +60s health-check timer cancels cleanly when the new version boots fine. +The status endpoint surfaces execution + lastResult + lockHeld so the +admin UI can render Apply / Cancel / Acknowledge state correctly. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 10: Refine UpdatePolicy for terminal-state gating + +**Files:** +- Modify: `src/node/updater/UpdatePolicy.ts` +- Modify: `src/tests/backend-new/specs/updater/UpdatePolicy.test.ts` + +`canAuto` and `canAutonomous` must return false while `execution.status === 'rollback-failed'` (manual remains allowed). + +- [ ] **Step 1: Add failing tests** + +Append to `UpdatePolicy.test.ts`: + +```typescript +describe('terminal-state gating', () => { + it('rollback-failed denies auto/autonomous but allows manual', () => { + const r = evaluatePolicy({ + ...baseInput, tier: 'autonomous', + executionStatus: 'rollback-failed', + }); + expect(r.canManual).toBe(true); + expect(r.canAuto).toBe(false); + expect(r.canAutonomous).toBe(false); + expect(r.reason).toBe('rollback-failed-terminal'); + }); + + it('idle execution does not affect canManual/canAuto', () => { + const r = evaluatePolicy({...baseInput, tier: 'autonomous', executionStatus: 'idle'}); + expect(r.canManual).toBe(true); + expect(r.canAuto).toBe(true); + expect(r.canAutonomous).toBe(true); + }); +}); +``` + +- [ ] **Step 2: Run — fail** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/UpdatePolicy.test.ts` +Expected: FAIL. + +- [ ] **Step 3: Update implementation** + +In `src/node/updater/UpdatePolicy.ts`: + +```typescript +export interface PolicyInput { + installMethod: Exclude; + tier: Tier; + current: string; + latest: string; + /** Optional — when known. Only `rollback-failed` materially changes policy. */ + executionStatus?: string; +} + +export const evaluatePolicy = ({installMethod, tier, current, latest, executionStatus}: PolicyInput): PolicyResult => { + if (tier === 'off') { + return {canNotify: false, canManual: false, canAuto: false, canAutonomous: false, reason: 'tier-off'}; + } + if (compareSemver(current, latest) >= 0) { + return {canNotify: false, canManual: false, canAuto: false, canAutonomous: false, reason: 'up-to-date'}; + } + const canNotify = true; + const writable = WRITABLE_METHODS.has(installMethod); + if (!writable) { + return {canNotify, canManual: false, canAuto: false, canAutonomous: false, reason: 'install-method-not-writable'}; + } + const terminal = executionStatus === 'rollback-failed'; + return { + canNotify, + canManual: tier === 'manual' || tier === 'auto' || tier === 'autonomous', + canAuto: !terminal && (tier === 'auto' || tier === 'autonomous'), + canAutonomous: !terminal && tier === 'autonomous', + reason: terminal ? 'rollback-failed-terminal' : 'ok', + }; +}; +``` + +Also update the `updateStatus.ts` call to pass `executionStatus: state.execution.status`. + +- [ ] **Step 4: Run — pass** + +Run: `pnpm vitest run src/tests/backend-new/specs/updater/UpdatePolicy.test.ts` +Expected: PASS (existing + 2 new). + +- [ ] **Step 5: Commit** + +```bash +git add src/node/updater/UpdatePolicy.ts src/node/hooks/express/updateStatus.ts \ + src/tests/backend-new/specs/updater/UpdatePolicy.test.ts +git commit -m "$(cat <<'EOF' +feat(updater): UpdatePolicy honours rollback-failed terminal state + +canAuto/canAutonomous are denied while execution.status === 'rollback-failed'; +canManual stays on because an admin clicking Apply *is* the intervention the +terminal state requires. Status endpoint passes execution.status through so +the admin UI sees the right policy result. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 11: Apply / Cancel / Acknowledge / Log endpoints + +**Files:** +- Create: `src/node/hooks/express/updateActions.ts` +- Modify: `src/node/hooks/express/admin.ts` if a hook-registration list lives there (none required if hooks loaded via `ep.json` — see step 3) +- Modify: `src/node/updater/ep.json` (or `src/ep.json`) to register the new hook +- Test: `src/tests/backend/specs/updateActions.ts` (mocha integration) + +Strict admin auth on all four endpoints (apply, cancel, acknowledge, log) — unlike `/admin/update/status` which is read-only and intentionally loose. POST endpoints require an authenticated `is_admin` session; the GET log endpoint requires the same. + +- [ ] **Step 1: Find the right hook registration site** + +```bash +grep -nE "updateStatus|updater/index" src/node/utils/Settings.ts src/node/server.ts src/node/hooks src/ep.json src/static/js/pluginfw 2>/dev/null +cat src/ep.json +``` + +PR 1 registered `updater/index.ts:expressCreateServer` and `hooks/express/updateStatus:expressCreateServer` in `src/ep.json`. Add `hooks/express/updateActions:expressCreateServer` in the same array. + +- [ ] **Step 2: Failing test (mocha)** + +Create `src/tests/backend/specs/updateActions.ts`: + +```typescript +'use strict'; + +const assert = require('assert').strict; +const common = require('../common'); +const plugins = require('../../../static/js/pluginfw/plugin_defs'); +import settings from '../../../node/utils/Settings'; +import {saveState} from '../../../node/updater/state'; +import {EMPTY_STATE} from '../../../node/updater/types'; +import path from 'node:path'; + +const statePath = () => path.join(settings.root, 'var', 'update-state.json'); +const authHookNames = ['preAuthorize', 'authenticate', 'authorize']; +const failHookNames = ['preAuthzFailure', 'authnFailure', 'authzFailure', 'authFailure']; + +const installAdminAuth = () => { + for (const h of authHookNames.concat(failHookNames)) plugins.hooks[h] = []; + plugins.hooks.authenticate = [{ + hook_fn: (_n: string, ctx: any, cb: Function) => { + ctx.req.session.user = {is_admin: true}; + cb([true]); + }, + }]; + (settings as any).requireAuthentication = true; + (settings as any).requireAuthorization = false; + (settings as any).users = {admin: {password: 'admin-pw', is_admin: true}}; +}; + +describe(__filename, function () { + let agent: any; + const backups: Record = {}; + + before(async () => { agent = await common.init(); }); + + beforeEach(async () => { + backups.hooks = {}; + for (const n of authHookNames.concat(failHookNames)) backups.hooks[n] = plugins.hooks[n]; + backups.settings = {}; + for (const k of ['requireAuthentication', 'requireAuthorization', 'users']) backups.settings[k] = (settings as any)[k]; + await saveState(statePath(), { + ...EMPTY_STATE, + latest: {version: '99.0.0', tag: 'v99.0.0', body: 'release', publishedAt: '2099-01-01T00:00:00Z', prerelease: false, htmlUrl: 'https://example/'}, + }); + }); + + afterEach(() => { + Object.assign(plugins.hooks, backups.hooks); + Object.assign(settings, backups.settings); + }); + + describe('POST /admin/update/apply', () => { + it('rejects unauthenticated', async () => { + await agent.post('/admin/update/apply').expect(401); + }); + + it('rejects when policy denies (non-git install method)', async () => { + installAdminAuth(); + const orig = settings.updates.installMethod; + settings.updates.installMethod = 'docker'; + try { + await agent.post('/admin/update/apply').auth('admin', 'admin-pw').expect(409); + } finally { settings.updates.installMethod = orig; } + }); + + it('rejects when an execution is already in flight', async () => { + installAdminAuth(); + await saveState(statePath(), { + ...EMPTY_STATE, + latest: {version: '99.0.0', tag: 'v99.0.0', body: '', publishedAt: '', prerelease: false, htmlUrl: ''}, + execution: {status: 'executing', targetTag: 'v99.0.0', fromSha: 'x', startedAt: '2026-05-08T00:00:00Z'}, + }); + await agent.post('/admin/update/apply').auth('admin', 'admin-pw').expect(409); + }); + }); + + describe('POST /admin/update/cancel', () => { + it('rejects when nothing is running (409)', async () => { + installAdminAuth(); + await agent.post('/admin/update/cancel').auth('admin', 'admin-pw').expect(409); + }); + }); + + describe('POST /admin/update/acknowledge', () => { + it('clears a terminal state to idle', async () => { + installAdminAuth(); + await saveState(statePath(), { + ...EMPTY_STATE, + execution: {status: 'rollback-failed', reason: 'install-failed; rollback failed: pnpm exit 1', targetTag: 'v99.0.0', fromSha: 'x', at: '2026-05-08T00:00:00Z'}, + lastResult: {targetTag: 'v99.0.0', fromSha: 'x', outcome: 'rollback-failed', reason: 'pnpm install failed', at: '2026-05-08T00:00:00Z'}, + }); + await agent.post('/admin/update/acknowledge').auth('admin', 'admin-pw').expect(200); + const status = await agent.get('/admin/update/status').expect(200); + assert.equal(status.body.execution.status, 'idle'); + }); + + it('refuses to clear a non-terminal state (409)', async () => { + installAdminAuth(); + await saveState(statePath(), {...EMPTY_STATE}); + await agent.post('/admin/update/acknowledge').auth('admin', 'admin-pw').expect(409); + }); + }); + + describe('GET /admin/update/log', () => { + it('requires admin auth', async () => { + await agent.get('/admin/update/log').expect(401); + }); + + it('returns 200 with text body for an admin', async () => { + installAdminAuth(); + const res = await agent.get('/admin/update/log').auth('admin', 'admin-pw').expect(200); + assert.equal(typeof res.text, 'string'); + }); + }); +}); +``` + +- [ ] **Step 3: Implement the route module** + +Create `src/node/hooks/express/updateActions.ts`: + +```typescript +'use strict'; + +import path from 'node:path'; +import fs from 'node:fs/promises'; +import {spawn} from 'node:child_process'; +import log4js from 'log4js'; +import {ArgsExpressType} from '../../types/ArgsExpressType'; +import settings, {getEpVersion} from '../../utils/Settings'; +import {getDetectedInstallMethod, stateFilePath, getRollbackDeps} from '../../updater'; +import {evaluatePolicy} from '../../updater/UpdatePolicy'; +import {loadState, saveState} from '../../updater/state'; +import {acquireLock, releaseLock, isHeld} from '../../updater/lock'; +import {executeUpdate} from '../../updater/UpdateExecutor'; +import {createDrainer} from '../../updater/SessionDrainer'; +import {runPreflight} from '../../updater/preflight'; +import {verifyReleaseTag} from '../../updater/trustedKeys'; +import {tailLines} from '../../updater/updateLog'; +import {UpdateState} from '../../updater/types'; + +const logger = log4js.getLogger('updater'); +const lockPath = () => path.join(settings.root, 'var', 'update.lock'); +const logPath = () => path.join(settings.root, 'var', 'log', 'update.log'); +const backupDir = () => path.join(settings.root, 'var', 'update-backup'); + +let drainer: ReturnType | null = null; + +const requireAdmin = (req: any, res: any): boolean => { + const u = req.session?.user; + if (!u) { res.status(401).send('Authentication required'); return false; } + if (!u.is_admin) { res.status(403).send('Forbidden'); return false; } + return true; +}; + +const wrapAsync = (fn: (req: any, res: any, next: Function) => Promise) => + (req: any, res: any, next: Function) => Promise.resolve(fn(req, res, next)).catch(next); + +const broadcastShout = (key: string, values: Record): void => { + // Use the existing shout pipeline via socket.io. PR 1 uses io.sockets.emit('shout', ...). + // We re-import lazily to dodge a require-cycle with the socketio hook. + try { + // eslint-disable-next-line @typescript-eslint/no-var-requires + const {io} = require('../socketio'); + if (!io) return; + io.sockets.emit('shout', { + type: 'COLLABROOM', + data: {type: 'shoutMessage', payload: {message: {message: key, values, sticky: false}, timestamp: Date.now()}}, + }); + } catch (err) { + logger.warn(`broadcastShout: ${(err as Error).message}`); + } +}; + +export const expressCreateServer = ( + _hookName: string, + {app}: ArgsExpressType, + cb: Function, +): void => { + if (settings.updates.tier === 'off') return cb(); + + app.post('/admin/update/apply', wrapAsync(async (req, res) => { + if (!requireAdmin(req, res)) return; + + const state = await loadState(stateFilePath()); + if (!state.latest) return res.status(409).json({error: 'no-known-latest'}); + if (state.execution.status !== 'idle' && state.execution.status !== 'verified' && + !state.execution.status.startsWith('rolled-back') && state.execution.status !== 'preflight-failed') { + return res.status(409).json({error: `execution-busy:${state.execution.status}`}); + } + + const installMethod = getDetectedInstallMethod(); + const policy = evaluatePolicy({ + installMethod, tier: settings.updates.tier, + current: getEpVersion(), latest: state.latest.version, + executionStatus: state.execution.status, + }); + if (!policy.canManual) return res.status(409).json({error: 'policy-denied', reason: policy.reason}); + + if (!await acquireLock(lockPath())) return res.status(409).json({error: 'lock-held'}); + + try { + // Preflight + const targetTag = state.latest.tag; + const startedAt = new Date().toISOString(); + const preState: UpdateState = {...state, execution: {status: 'preflight', targetTag, startedAt}}; + await saveState(stateFilePath(), preState); + + const pf = await runPreflight( + {targetTag, diskSpaceMinMB: settings.updates.diskSpaceMinMB, + requireSignature: settings.updates.requireSignature, + trustedKeysPath: settings.updates.trustedKeysPath}, + { + installMethod, + workingTreeClean: () => new Promise((resolve) => { + const c = spawn('git', ['status', '--porcelain'], {cwd: settings.root}); + let out = ''; + c.stdout.on('data', (b) => { out += b.toString(); }); + c.on('close', () => resolve(out.trim().length === 0)); + }), + freeDiskMB: async () => { + const {statfs} = await import('node:fs/promises'); + try { + const s = await (statfs as any)(settings.root); + return Math.floor((s.bavail * s.bsize) / (1024 * 1024)); + } catch { return Number.POSITIVE_INFINITY; } // fall back to "no constraint" if statfs unsupported + }, + pnpmOnPath: () => new Promise((resolve) => { + const c = spawn('pnpm', ['--version'], {stdio: 'ignore'}); + c.on('close', (code) => resolve(code === 0)); + c.on('error', () => resolve(false)); + }), + lockHeld: async () => false, // we just acquired it + remoteHasTag: (tag) => new Promise((resolve) => { + const c = spawn('git', ['ls-remote', '--tags', 'origin', tag], {cwd: settings.root, stdio: ['ignore', 'pipe', 'ignore']}); + let out = ''; + c.stdout.on('data', (b) => { out += b.toString(); }); + c.on('close', () => resolve(out.trim().length > 0)); + c.on('error', () => resolve(false)); + }), + verifyTag: () => verifyReleaseTag({ + tag: targetTag, repoDir: settings.root, + requireSignature: settings.updates.requireSignature, + trustedKeysPath: settings.updates.trustedKeysPath, + }), + }, + ); + + if (!pf.ok) { + const at = new Date().toISOString(); + await saveState(stateFilePath(), { + ...preState, + execution: {status: 'preflight-failed', targetTag, reason: pf.reason, at}, + lastResult: {targetTag, fromSha: '', outcome: 'preflight-failed', reason: pf.reason, at}, + }); + await releaseLock(lockPath()); + return res.status(409).json({error: 'preflight-failed', reason: pf.reason}); + } + + // Drain + drainer = createDrainer({ + drainSeconds: Number(settings.updates.drainSeconds) || 60, + broadcast: (key, values) => broadcastShout(key, values), + }); + const drainEndsAt = new Date(Date.now() + (Number(settings.updates.drainSeconds) || 60) * 1000).toISOString(); + await saveState(stateFilePath(), { + ...preState, + execution: {status: 'draining', targetTag, drainEndsAt, startedAt: new Date().toISOString()}, + }); + + // Respond before drain completes — UI polls /admin/update/status + /log. + res.status(202).json({accepted: true, drainEndsAt}); + + const drainResult = await drainer.start(); + drainer = null; + if (drainResult.outcome === 'cancelled') { + // The /admin/update/cancel handler already wrote state.execution=idle and + // lastResult=cancelled. Don't overwrite it here — just release the lock + // and return; the supervisor doesn't need to restart. + await releaseLock(lockPath()); + return; + } + + const fresh = await loadState(stateFilePath()); + await executeUpdate({ + repoDir: settings.root, + backupDir: backupDir(), + spawnFn: spawn as any, + readSha: () => new Promise((resolve, reject) => { + const c = spawn('git', ['rev-parse', 'HEAD'], {cwd: settings.root, stdio: ['ignore', 'pipe', 'ignore']}); + let out = ''; + c.stdout.on('data', (b) => { out += b.toString(); }); + c.on('close', (code) => code === 0 ? resolve(out.trim()) : reject(new Error(`git rev-parse exit ${code}`))); + c.on('error', reject); + }), + copyFile: (src, dst) => fs.mkdir(path.dirname(dst), {recursive: true}).then(() => fs.copyFile(src, dst)), + saveState: (s) => saveState(stateFilePath(), s), + initialState: fresh, + targetTag, + now: () => new Date(), + exit: (code) => process.exit(code), + }); + // executeUpdate either calls process.exit(75) (pending-verification) or returns + // on a failure path. Failure paths are handled by the next process boot via + // RollbackHandler's pending-verification check + the rolling-back path inside performRollback. + // If we reach here, the failure path was hit and we need to perform rollback now. + const afterExec = await loadState(stateFilePath()); + if (afterExec.execution.status === 'rolling-back') { + const {performRollback} = await import('../../updater/RollbackHandler'); + await performRollback(afterExec, getRollbackDeps()); + } + await releaseLock(lockPath()); + } catch (err) { + logger.error(`apply failed: ${(err as Error).stack || err}`); + try { await releaseLock(lockPath()); } catch {/* noop */} + if (!res.headersSent) res.status(500).json({error: 'internal'}); + } + })); + + app.post('/admin/update/cancel', wrapAsync(async (req, res) => { + if (!requireAdmin(req, res)) return; + const state = await loadState(stateFilePath()); + // Cancel is allowed only during pre-execute states. Once executing begins (lockfile/SHA mutated) + // we either complete or rollback. Spec section "Error handling" / state machine. + if (state.execution.status !== 'preflight' && state.execution.status !== 'draining') { + return res.status(409).json({error: 'not-cancellable', status: state.execution.status}); + } + if (drainer) drainer.cancel(); + await saveState(stateFilePath(), {...state, execution: {status: 'idle'}, lastResult: { + targetTag: (state.execution as any).targetTag ?? '', + fromSha: '', + outcome: 'cancelled', + reason: 'admin-cancelled', + at: new Date().toISOString(), + }}); + try { await releaseLock(lockPath()); } catch {/* noop */} + res.json({cancelled: true}); + })); + + app.post('/admin/update/acknowledge', wrapAsync(async (req, res) => { + if (!requireAdmin(req, res)) return; + const state = await loadState(stateFilePath()); + const terminal = ['rollback-failed', 'preflight-failed', 'rolled-back']; + if (!terminal.some((t) => state.execution.status === t)) { + return res.status(409).json({error: 'not-terminal', status: state.execution.status}); + } + await saveState(stateFilePath(), {...state, execution: {status: 'idle'}, bootCount: 0}); + res.json({acknowledged: true}); + })); + + app.get('/admin/update/log', wrapAsync(async (req, res) => { + if (!requireAdmin(req, res)) return; + const lines = await tailLines(logPath(), 200); + res.set('Content-Type', 'text/plain; charset=utf-8'); + res.send(lines.join('\n')); + })); + + // Lock-held probe so isHeld is reachable. Status endpoint already calls this. + void isHeld; + + cb(); +}; +``` + +In `src/ep.json`, add the new hook (find the existing `expressCreateServer` block listing `updateStatus` and append): + +```json +{ + "expressCreateServer": [ + "ep_etherpad-lite/node/updater/index", + "ep_etherpad-lite/node/hooks/express/updateStatus", + "ep_etherpad-lite/node/hooks/express/updateActions" + ] +} +``` + +(Adjust the array structure to match the actual `ep.json` format — likely each hook is a separate object. Verify with `cat src/ep.json` first.) + +- [ ] **Step 4: Run — pass** + +```bash +pnpm run ts-check +pnpm run test -- --grep updateActions +``` + +Expected: TS clean, mocha PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/node/hooks/express/updateActions.ts src/ep.json src/tests/backend/specs/updateActions.ts +git commit -m "$(cat <<'EOF' +feat(updater): apply / cancel / acknowledge / log endpoints + +Strict admin-only POSTs that drive Tier 2's manual-click flow: +- /admin/update/apply: acquire lock, preflight, drain 60s, execute, exit 75 +- /admin/update/cancel: cancel a pre-execute state, release lock +- /admin/update/acknowledge: clear terminal states (preflight-failed, + rolled-back, rollback-failed) back to idle +- /admin/update/log: tail var/log/update.log for the in-progress UI + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 12: Admin UI — Apply / Cancel / Acknowledge buttons + +**Files:** +- Modify: `admin/src/pages/UpdatePage.tsx` +- Modify: `admin/src/store/store.ts` +- Modify: `src/locales/en.json` + +- [ ] **Step 1: Extend the store** + +In `admin/src/store/store.ts`, extend `UpdateStatusPayload`: + +```typescript +export type Execution = + | {status: 'idle'} + | {status: 'preflight'; targetTag: string; startedAt: string} + | {status: 'preflight-failed'; targetTag: string; reason: string; at: string} + | {status: 'draining'; targetTag: string; drainEndsAt: string; startedAt: string} + | {status: 'executing'; targetTag: string; fromSha: string; startedAt: string} + | {status: 'pending-verification'; targetTag: string; fromSha: string; deadlineAt: string} + | {status: 'verified'; targetTag: string; verifiedAt: string} + | {status: 'rolling-back'; reason: string; targetTag: string; fromSha: string; at: string} + | {status: 'rolled-back'; reason: string; targetTag: string; restoredSha: string; at: string} + | {status: 'rollback-failed'; reason: string; targetTag: string; fromSha: string; at: string}; + +export interface UpdateStatusPayload { + // ...existing fields... + execution: Execution; + lastResult: null | { + targetTag: string; fromSha: string; + outcome: 'verified' | 'rolled-back' | 'rollback-failed' | 'preflight-failed' | 'cancelled'; + reason: string | null; at: string; + }; + lockHeld: boolean; +} +``` + +Add a log slice: + +```typescript +type StoreState = { + // ...existing... + updateLog: string; + setUpdateLog: (log: string) => void; +}; +// in create(): +updateLog: '', +setUpdateLog: (log) => set({updateLog: log}), +``` + +- [ ] **Step 2: Replace `UpdatePage.tsx`** + +Replace the `return` block of `UpdatePage` so the `ok` path renders Apply/Cancel/Acknowledge per `execution.status`: + +```tsx +const apply = async () => { + await fetch('/admin/update/apply', {method: 'POST', credentials: 'same-origin'}); + // Re-fetch status — server returned 202, the actual transition happened in the background. + const r = await fetch('/admin/update/status', {credentials: 'same-origin'}); + if (r.ok) setUpdateStatus(await r.json()); +}; +const cancel = async () => { + await fetch('/admin/update/cancel', {method: 'POST', credentials: 'same-origin'}); + const r = await fetch('/admin/update/status', {credentials: 'same-origin'}); + if (r.ok) setUpdateStatus(await r.json()); +}; +const acknowledge = async () => { + await fetch('/admin/update/acknowledge', {method: 'POST', credentials: 'same-origin'}); + const r = await fetch('/admin/update/status', {credentials: 'same-origin'}); + if (r.ok) setUpdateStatus(await r.json()); +}; + +const status = us.execution.status; +const showApply = us.policy?.canManual && (status === 'idle' || status === 'verified' || status.startsWith('rolled-back') || status === 'preflight-failed') && !us.lockHeld; +const showCancel = status === 'preflight' || status === 'draining'; +const showAcknowledge = status === 'preflight-failed' || status === 'rolled-back' || status === 'rollback-failed'; + +return ( +
+

+
+ {/* ...existing dl entries... */} +
+
{t(`update.execution.${status}`, {defaultValue: status})}
+
+ {us.lastResult && ( +

+ +

+ )} + {us.policy && !us.policy.canManual && ( +

+ +

+ )} +
+ {showApply && } + {showCancel && } + {showAcknowledge && } +
+ {/* changelog block — keep as in PR 1 */} +
+); +``` + +- [ ] **Step 3: Add the i18n keys** + +In `src/locales/en.json`, add: + +```json + "update.page.apply": "Apply update", + "update.page.cancel": "Cancel", + "update.page.acknowledge": "Acknowledge", + "update.page.execution": "Status", + "update.page.policy.install-method-not-writable": "Updates from the admin UI require a git install. Update via your package manager.", + "update.page.policy.rollback-failed-terminal": "A previous update failed and could not be rolled back. Manual intervention required; press Acknowledge to clear the lock once the install is healthy.", + "update.page.policy.up-to-date": "You are running the latest version.", + "update.page.policy.tier-off": "Updates are disabled (updates.tier = \"off\").", + "update.page.last_result.verified": "Last update to {{tag}} verified.", + "update.page.last_result.rolled-back": "Last attempted update to {{tag}} rolled back: {{reason}}.", + "update.page.last_result.rollback-failed": "Last update attempt failed AND rollback failed: {{reason}}. Manual intervention required.", + "update.page.last_result.preflight-failed": "Last attempted update to {{tag}} failed preflight: {{reason}}.", + "update.page.last_result.cancelled": "Last attempted update to {{tag}} cancelled by admin.", + "update.execution.idle": "Idle", + "update.execution.preflight": "Pre-flight checks", + "update.execution.preflight-failed": "Pre-flight failed", + "update.execution.draining": "Draining sessions", + "update.execution.executing": "Updating...", + "update.execution.pending-verification": "Pending verification", + "update.execution.verified": "Verified", + "update.execution.rolling-back": "Rolling back", + "update.execution.rolled-back": "Rolled back", + "update.execution.rollback-failed": "Rollback failed", + "update.banner.terminal.rollback-failed": "An update attempt failed and could not be rolled back. Manual intervention required.", + "update.drain.t60": "Etherpad will restart in 60 seconds to apply an update.", + "update.drain.t30": "Etherpad will restart in 30 seconds to apply an update.", + "update.drain.t10": "Etherpad will restart in 10 seconds to apply an update." +``` + +- [ ] **Step 4: Build the admin UI and visit it locally** + +```bash +pnpm install # ensure admin deps in case anything is missing +pnpm --filter admin run build +pnpm run dev -- --port 9003 & +# In a browser: http://localhost.lan:9003/admin/update — log in as admin +# Verify the Apply button renders when latest version differs from current +kill %1 +``` + +> Don't kill the apply manually after pressing it on a real install — the update will actually run. Use `pnpm run dev` in a disposable worktree if you want to test the full apply path. + +- [ ] **Step 5: Commit** + +```bash +git add admin/src/pages/UpdatePage.tsx admin/src/store/store.ts src/locales/en.json +git commit -m "$(cat <<'EOF' +feat(updater): admin UI Apply/Cancel/Acknowledge buttons + +UpdatePage renders the right action set per execution.status, surfaces +lastResult with localised copy, and shows policy denial reasons (e.g. +install-method-not-writable, rollback-failed-terminal). Buttons round- +trip status through /admin/update/status after each action. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 13: Admin UI — log stream view + +**Files:** +- Modify: `admin/src/pages/UpdatePage.tsx` + +While `execution.status === 'preflight' | 'draining' | 'executing' | 'rolling-back'`, poll `/admin/update/log` once a second and render the tail in a `
`. Stop polling when the status leaves the set.
+
+- [ ] **Step 1: Add the polling effect**
+
+Inside `UpdatePage`, after the existing `useEffect` for `/admin/update/status`, add:
+
+```tsx
+const log = useStore((s) => s.updateLog);
+const setLog = useStore((s) => s.setUpdateLog);
+const inFlight = ['preflight', 'draining', 'executing', 'rolling-back'].includes(us?.execution?.status ?? '');
+useEffect(() => {
+  if (!inFlight) return;
+  let cancelled = false;
+  const tick = async () => {
+    if (cancelled) return;
+    try {
+      const r = await fetch('/admin/update/log', {credentials: 'same-origin'});
+      if (r.ok) setLog(await r.text());
+      // Re-fetch status too so we know when to stop polling.
+      const s = await fetch('/admin/update/status', {credentials: 'same-origin'});
+      if (s.ok) setUpdateStatus(await s.json());
+    } catch {/* noop */}
+    if (!cancelled) setTimeout(tick, 1000);
+  };
+  tick();
+  return () => { cancelled = true; };
+}, [inFlight, setLog, setUpdateStatus]);
+```
+
+In the JSX:
+
+```tsx
+{inFlight && (
+  
+

+
{log}
+
+)} +``` + +- [ ] **Step 2: Add i18n key** + +In `src/locales/en.json`: + +```json + "update.page.log": "Update log (last 200 lines)" +``` + +- [ ] **Step 3: Smoke test in a browser** + +Same workflow as Task 12 step 4. Trigger an Apply on a git checkout that's safe to update (e.g., a disposable worktree). Watch the log block populate. + +- [ ] **Step 4: Commit** + +```bash +git add admin/src/pages/UpdatePage.tsx src/locales/en.json +git commit -m "$(cat <<'EOF' +feat(updater): admin UI streams update log while update is in flight + +While execution.status is preflight/draining/executing/rolling-back the +page polls /admin/update/log + /admin/update/status once a second, +showing the rolling tail and switching off automatically when the run +terminates. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 14: Pad-side drain announcement + +**Files:** +- Modify: `src/static/js/chat.js` or `src/static/js/pad.js` (whichever handles incoming `shoutMessage`) +- Modify: `src/locales/en.json` (already done in Task 12 — verify keys exist) + +`broadcastShout` in Task 11 sends a shoutMessage payload of the form `{message: {message: 'update.drain.t60', values: {seconds: 60}}, ...}`. The pad client renders shouts via the existing chat pipeline. We need that pipeline to look up `payload.message.message` as a translation key when present and substitute `payload.message.values`. + +- [ ] **Step 1: Find the shout-rendering site** + +```bash +grep -rn "shoutMessage\|payload.message" src/static/js/ | head -20 +``` + +Locate the function that turns the COLLABROOM shoutMessage into chat text. In Etherpad core that lives in `src/static/js/pad.js` or `src/static/js/chat.js` — search for `shoutMessage`. + +- [ ] **Step 2: Extend the renderer to handle i18n keys** + +Wrap the existing logic so `if (typeof payload.message.message === 'string' && payload.message.message.startsWith('update.drain.'))` is rendered through `html10n.translations` lookup; otherwise fall back to current behaviour. Concrete patch (adapt to actual code): + +```javascript +// existing: +// const text = payload.message.message; +// becomes: +const raw = payload.message.message; +const values = payload.message.values || {}; +let text = raw; +if (typeof raw === 'string' && raw.startsWith('update.drain.') && window.html10n && window.html10n.translations) { + const tpl = window.html10n.translations[raw]; + if (typeof tpl === 'string') { + text = tpl.replace(/\{\{(\w+)\}\}/g, (_, k) => String(values[k] ?? '')); + } +} +``` + +(`html10n.get(raw, values)` is the bound API but `window._` is unbound per memory `project_plugin_window_underscore_audit.md` — go through `window.html10n.translations` directly to dodge that bug.) + +- [ ] **Step 3: Add a Playwright test** + +In `src/tests/frontend-new/specs/`, add a spec that opens a pad, simulates a shout from the admin socket via the existing admin shout test pattern (`grep -rn "shout" src/tests/frontend-new/`) — if no harness exists, skip this Playwright test and rely on the manual smoke step below. **Do not write a fake test.** + +- [ ] **Step 4: Manual smoke test** + +```bash +pnpm run dev -- --port 9003 & +# Open http://localhost.lan:9003/p/test-drain in one tab +# In another tab, log in to /admin and use the Shout feature to send "update.drain.t60" +# Verify the pad shows "Etherpad will restart in 60 seconds..." +kill %1 +``` + +If the manual test fails — i.e., the pad shows the literal key — adjust the renderer in step 2 until the pad shows the localised string. Per memory `feedback_test_localized_strings`, do not declare done while the literal key shows. + +- [ ] **Step 5: Commit** + +```bash +git add src/static/js/chat.js src/static/js/pad.js +git commit -m "$(cat <<'EOF' +feat(updater): pad shoutMessage renders update.drain.* via html10n + +When the executor's drain phase broadcasts update.drain.t60/t30/t10, +pads render the localised string instead of the bare i18n key. Goes +through html10n.translations directly to dodge the unbound window._ +bug documented in project_plugin_window_underscore_audit. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 15: Integration test — end-to-end against a tmp git repo + +**Files:** +- Create: `src/tests/backend/specs/updater-integration.ts` + +This is the highest-value test in the plan: it runs `executeUpdate` against a real tmp git repo, verifying happy path + each rollback variant by stubbing only the steps that would mutate the *current* install (we replace `pnpm install` with a `bash -c 'exit 0'` and similar). The test is deliberately heavy — run it on its own, not in the unit-test loop. + +- [ ] **Step 1: Skeleton failing test** + +Create `src/tests/backend/specs/updater-integration.ts`: + +```typescript +'use strict'; + +const assert = require('assert').strict; +import {execSync, spawn} from 'node:child_process'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import {executeUpdate} from '../../../node/updater/UpdateExecutor'; +import {performRollback, checkPendingVerification} from '../../../node/updater/RollbackHandler'; +import {EMPTY_STATE} from '../../../node/updater/types'; + +const sh = (cmd: string, opts: any = {}) => execSync(cmd, {stdio: 'pipe', ...opts}).toString().trim(); + +const buildTmpRepo = async (): Promise => { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-it-')); + sh('git init -b main', {cwd: dir}); + sh('git config user.email test@example.com', {cwd: dir}); + sh('git config user.name test', {cwd: dir}); + await fs.writeFile(path.join(dir, 'pnpm-lock.yaml'), 'lockfileVersion: x\n'); + sh('git add . && git commit -m initial', {cwd: dir}); + sh('git tag v0.0.1', {cwd: dir}); + await fs.writeFile(path.join(dir, 'pnpm-lock.yaml'), 'lockfileVersion: y\n'); + sh('git add . && git commit -m bump', {cwd: dir}); + sh('git tag v0.0.2', {cwd: dir}); + // executor expects an "origin" — point it at the same dir for the ls-remote check. + sh(`git remote add origin ${dir}`, {cwd: dir}); + return dir; +}; + +const stubSpawn = (overrides: Record = {}) => { + // Emulate spawn for everything by mapping (cmd, args) -> exit code. + return ((cmd: string, args: string[]) => { + const key = `${cmd} ${args.join(' ')}`; + const exit = overrides[key] ?? (cmd === 'pnpm' ? 0 : -1); // -1 means "use real git" + if (exit === -1) { + // Real git for this step. + const real = spawn(cmd, args, {cwd: (overrides as any).__cwd, stdio: ['ignore', 'pipe', 'pipe']}); + return real; + } + return { + stdout: {on: () => {}}, stderr: {on: () => {}}, + on: (e: string, cb: any) => e === 'close' && setImmediate(() => cb(exit)), + } as any; + }) as any; +}; + +describe(__filename, function () { + this.timeout(20_000); + + it('happy path: executes against tmp repo, lands on pending-verification', async () => { + const repo = await buildTmpRepo(); + const states: any[] = []; + let exited: number | null = null; + const r = await executeUpdate({ + repoDir: repo, + backupDir: path.join(repo, 'var', 'update-backup'), + spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0, 'pnpm run build:ui': 0, __cwd: repo} as any), + readSha: async () => sh('git rev-parse HEAD', {cwd: repo}), + copyFile: (s, d) => fs.mkdir(path.dirname(d), {recursive: true}).then(() => fs.copyFile(s, d)), + saveState: async (s) => { states.push(structuredClone(s)); }, + initialState: structuredClone(EMPTY_STATE), + targetTag: 'v0.0.2', + now: () => new Date(), + exit: (code) => { exited = code; }, + }); + assert.equal(r.outcome, 'pending-verification'); + assert.equal(exited, 75); + assert.equal(states.at(-1).execution.status, 'pending-verification'); + // Backup file exists. + await fs.access(path.join(repo, 'var', 'update-backup', 'pnpm-lock.yaml')); + await fs.rm(repo, {recursive: true, force: true}); + }); + + it('install failure rolls back to original SHA', async () => { + const repo = await buildTmpRepo(); + const original = sh('git rev-parse HEAD', {cwd: repo}); + let exited: number | null = null; + const states: any[] = []; + + // Phase 1: executor with failing install. + await executeUpdate({ + repoDir: repo, backupDir: path.join(repo, 'var', 'update-backup'), + spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 1, __cwd: repo} as any), + readSha: async () => sh('git rev-parse HEAD', {cwd: repo}), + copyFile: (s, d) => fs.mkdir(path.dirname(d), {recursive: true}).then(() => fs.copyFile(s, d)), + saveState: async (s) => { states.push(structuredClone(s)); }, + initialState: structuredClone(EMPTY_STATE), + targetTag: 'v0.0.2', + now: () => new Date(), + exit: (c) => { exited = c; }, + }); + assert.equal(states.at(-1).execution.status, 'rolling-back'); + + // Phase 2: rollback. + await performRollback(states.at(-1), { + repoDir: repo, backupDir: path.join(repo, 'var', 'update-backup'), + spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0, __cwd: repo} as any), + copyFile: (s, d) => fs.copyFile(s, d), + saveState: async (s) => { states.push(structuredClone(s)); }, + exit: (c) => { exited = c; }, + now: () => new Date(), + rollbackHealthCheckSeconds: 60, + }); + assert.equal(states.at(-1).execution.status, 'rolled-back'); + assert.equal(sh('git rev-parse HEAD', {cwd: repo}), original); + assert.equal(exited, 75); + await fs.rm(repo, {recursive: true, force: true}); + }); + + // Add: build-failure rollback (same as install-failure but with build:ui exit 1). + // Add: crash-loop guard (state.bootCount = 3 forces immediate rollback in checkPendingVerification). +}); +``` + +- [ ] **Step 2: Run — confirm fail / pass** + +Run: `pnpm run test -- --grep updater-integration` +Expected: PASS for the two scenarios above; if not, debug — typical issues are `git ls-remote --tags` against a self-origin which needs `git push origin v0.0.2` first; add it inside `buildTmpRepo`. + +- [ ] **Step 3: Add the build-failure + crash-loop scenarios** + +Append: + +```typescript + it('build failure rolls back to original SHA', async () => { /* same as install but spawnFn returns build:ui=1, install=0 */ }); + + it('crash-loop guard forces rollback when bootCount > 2', async () => { + const repo = await buildTmpRepo(); + const original = sh('git rev-parse HEAD', {cwd: repo}); + sh('git checkout v0.0.2', {cwd: repo}); + // pretend we're already on v0.0.2 (post-update boot) and the lockfile backup exists. + await fs.mkdir(path.join(repo, 'var', 'update-backup'), {recursive: true}); + await fs.copyFile(path.join(repo, 'pnpm-lock.yaml'), path.join(repo, 'var', 'update-backup', 'pnpm-lock.yaml')); + sh(`git checkout ${original}`, {cwd: repo}); + sh(`cp var/update-backup/pnpm-lock.yaml pnpm-lock.yaml`, {cwd: repo}); + sh('git checkout v0.0.2', {cwd: repo}); + + let exited: number | null = null; + const states: any[] = []; + const state = { + ...structuredClone(EMPTY_STATE), + execution: {status: 'pending-verification', targetTag: 'v0.0.2', fromSha: original, deadlineAt: '2026-05-08T10:00:00Z'} as const, + bootCount: 3, + }; + const r = checkPendingVerification(state, { + repoDir: repo, backupDir: path.join(repo, 'var', 'update-backup'), + spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0, __cwd: repo} as any), + copyFile: (s, d) => fs.copyFile(s, d), + saveState: async (s) => { states.push(structuredClone(s)); }, + exit: (c) => { exited = c; }, + now: () => new Date(), + rollbackHealthCheckSeconds: 60, + }); + assert.equal(r.armed, false); + // Wait a tick for the async rollback to finish. + await new Promise((r) => setImmediate(r)); + assert.equal(states.at(-1).execution.status, 'rolled-back'); + assert.equal(sh('git rev-parse HEAD', {cwd: repo}), original); + assert.equal(exited, 75); + await fs.rm(repo, {recursive: true, force: true}); + }); +``` + +- [ ] **Step 4: Run all integration tests** + +Run: `pnpm run test -- --grep "updater-integration|updateActions|updateStatus"` +Expected: PASS for everything. + +- [ ] **Step 5: Commit** + +```bash +git add src/tests/backend/specs/updater-integration.ts +git commit -m "$(cat <<'EOF' +test(updater): integration suite over a tmp git repo + +Exercises executeUpdate + performRollback + checkPendingVerification +end-to-end against a disposable git repo with two tagged commits: +happy path -> pending-verification, install-fail rollback, build-fail +rollback, crash-loop bootCount>2 forced rollback. Runs with mocha at +20s timeout; no real pnpm/network. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 16: Playwright spec — admin Apply flow + +**Files:** +- Create: `src/tests/frontend-new/admin-spec/update-page-actions.spec.ts` + +The Playwright spec stubs the network: it intercepts `/admin/update/status` to seed a fake `latest`, intercepts `/admin/update/apply` to return `202`, and verifies the UI transitions through the right buttons. We do *not* actually run an update — that's covered by the manual smoke runbook. + +- [ ] **Step 1: Failing spec** + +Create `src/tests/frontend-new/admin-spec/update-page-actions.spec.ts`: + +```typescript +import {expect, test} from '@playwright/test'; + +const baseStatus = { + currentVersion: '2.7.1', + latest: {version: '2.7.2', tag: 'v2.7.2', body: 'release notes', publishedAt: '2026-05-01T00:00:00Z', prerelease: false, htmlUrl: 'https://example/'}, + lastCheckAt: '2026-05-08T00:00:00Z', + installMethod: 'git', + tier: 'manual', + policy: {canNotify: true, canManual: true, canAuto: false, canAutonomous: false, reason: 'ok'}, + vulnerableBelow: [], + execution: {status: 'idle'}, + lastResult: null, + lockHeld: false, +}; + +test('admin Apply button posts to /admin/update/apply and re-fetches status', async ({page}) => { + let posted = false; + await page.route('**/admin/update/status', (route) => route.fulfill({json: baseStatus})); + await page.route('**/admin/update/apply', (route) => { posted = true; route.fulfill({status: 202, json: {accepted: true}}); }); + await page.goto('/admin/update'); + await expect(page.getByRole('button', {name: /apply update/i})).toBeVisible(); + await page.getByRole('button', {name: /apply update/i}).click(); + await expect.poll(() => posted).toBe(true); +}); + +test('install-method-not-writable hides Apply and shows the policy reason', async ({page}) => { + const denied = {...baseStatus, installMethod: 'docker', + policy: {canNotify: true, canManual: false, canAuto: false, canAutonomous: false, reason: 'install-method-not-writable'}}; + await page.route('**/admin/update/status', (route) => route.fulfill({json: denied})); + await page.goto('/admin/update'); + await expect(page.getByRole('button', {name: /apply update/i})).toHaveCount(0); + await expect(page.getByText(/Updates from the admin UI require a git install/i)).toBeVisible(); +}); + +test('rollback-failed shows Acknowledge button', async ({page}) => { + const terminal = {...baseStatus, + execution: {status: 'rollback-failed', reason: 'pnpm install failed; rollback failed: pnpm exit 1', targetTag: 'v2.7.2', fromSha: 'x', at: '2026-05-08T00:00:00Z'}, + lastResult: {targetTag: 'v2.7.2', fromSha: 'x', outcome: 'rollback-failed', reason: 'pnpm install failed', at: '2026-05-08T00:00:00Z'}}; + await page.route('**/admin/update/status', (route) => route.fulfill({json: terminal})); + await page.goto('/admin/update'); + await expect(page.getByRole('button', {name: /acknowledge/i})).toBeVisible(); +}); +``` + +- [ ] **Step 2: Run** + +```bash +pnpm run test-ui -- src/tests/frontend-new/admin-spec/update-page-actions.spec.ts +``` + +Expected: PASS. + +- [ ] **Step 3: Commit** + +```bash +git add src/tests/frontend-new/admin-spec/update-page-actions.spec.ts +git commit -m "$(cat <<'EOF' +test(updater): Playwright admin Apply flow + policy denial + acknowledge + +Stubs /admin/update/status and /admin/update/apply at the route level so +we can assert UI transitions (button visibility, policy-denial copy, +terminal-state acknowledge) without actually running an update. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 17: Banner copy for terminal states + +**Files:** +- Modify: `admin/src/components/UpdateBanner.tsx` + +When `execution.status === 'rollback-failed'`, the banner text should be the strong `update.banner.terminal.rollback-failed` copy and link to `/update`. + +- [ ] **Step 1: Patch the banner** + +Replace the JSX so it picks the right key: + +```tsx +if (!updateStatus) return null; +const exec = updateStatus.execution?.status; +if (exec === 'rollback-failed') { + return ( +
+ {' '} + {t('update.banner.cta')} +
+ ); +} +if (!updateStatus.latest || updateStatus.currentVersion === updateStatus.latest.version) return null; +// existing ok-banner... +``` + +- [ ] **Step 2: Manual visual test** + +Seed the state file (`var/update-state.json`) with `execution.status: 'rollback-failed'` then load `/admin/update`. Confirm the banner copy matches `update.banner.terminal.rollback-failed`, not the literal key. Per memory `feedback_test_localized_strings`, fail the task if the literal key shows. + +- [ ] **Step 3: Commit** + +```bash +git add admin/src/components/UpdateBanner.tsx +git commit -m "$(cat <<'EOF' +feat(updater): admin banner shows rollback-failed terminal state + +When execution.status is rollback-failed, the banner switches to a +role=alert with stronger copy, regardless of whether a new release is +known. Other terminal states (preflight-failed, rolled-back) surface on +the page itself, not the banner — they're informational, not urgent. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 18: Documentation + smoke runbook + +**Files:** +- Modify: `doc/admin/updates.md` +- Modify: `CHANGELOG.md` +- Create: `docs/superpowers/specs/2026-04-25-auto-update-runbook.md` + +The spec's "Phased rollout / PR 2" entry calls out a runbook ("manual smoke runbook in `docs/superpowers/specs/2026-04-25-auto-update-runbook.md`, run before each tier ships, against a disposable VM"). This task ships it alongside the user-facing docs. + +- [ ] **Step 0: Write the smoke runbook** + +Create `docs/superpowers/specs/2026-04-25-auto-update-runbook.md` covering: + +1. Provisioning a disposable Ubuntu/Debian VM with systemd + a checked-out git install. +2. Setting `updates.tier: "manual"` in `settings.json`. +3. Booting under systemd with `Restart=on-failure` + `RestartSec=5` (sample unit file inline). +4. Forcing a downgrade by `git checkout` of the previous tag, restart, confirm Apply button shows. +5. Apply, observe drain broadcasts in a separate pad, observe restart, observe verified state. +6. Forcing rollback: corrupt `pnpm-lock.yaml` between checkout and install (or pin to a tag with a known-broken build), Apply, observe rolled-back state. +7. Forcing rollback-failed: also break the backup lockfile, Apply, observe terminal state and Acknowledge flow. +8. Crash-loop guard: pin a tag whose code throws on boot, Apply, observe bootCount climb to 3 + forced rollback. +9. Sign-off checklist: every observable transition matches `docs/superpowers/specs/2026-04-25-auto-update-design.md` "State machine". + +- [ ] **Step 1: Append Tier 2 section to `doc/admin/updates.md`** + +Document: +- Activation: `updates.tier: "manual"` requires a `git` install. +- Process supervisor required (systemd/pm2/docker restart-policy) — Etherpad exits 75 to trigger restart. +- Apply flow: button → preflight → 60s drain (broadcasts at T-60/-30/-10) → fetch/checkout/install/build → exit → restart → 60s health check. +- Rollback paths: install/build failure, health-check timeout, crash loop (>2 reboots). +- Terminal states: `preflight-failed` and `rolled-back` are informational; `rollback-failed` requires `POST /admin/update/acknowledge` after manual recovery. +- Settings: each new key with default + when to change. +- Signature verification: opt-in via `requireSignature: true`; document GNUPGHOME path. +- What is *not* covered: Tier 3 (auto) and Tier 4 (autonomous) ship later. + +- [ ] **Step 2: Add to `CHANGELOG.md` Unreleased** + +```markdown +### Updater +- Tier 2 (manual click): admins can now apply updates from `/admin/update` on git installs. Requires a process supervisor; the executor exits 75 to trigger restart, and the next boot runs a 60s health check that auto-rolls back on failure. Tags are signature-checked when `updates.requireSignature: true`. New settings: `updates.preApplyGraceMinutes`, `drainSeconds`, `rollbackHealthCheckSeconds`, `diskSpaceMinMB`, `requireSignature`, `trustedKeysPath`. +``` + +- [ ] **Step 3: Commit** + +```bash +git add doc/admin/updates.md CHANGELOG.md docs/superpowers/specs/2026-04-25-auto-update-runbook.md +git commit -m "$(cat <<'EOF' +docs(updater): document Tier 2 manual-click flow + smoke runbook + +Adds doc/admin/updates.md Tier 2 section: prerequisites (git install + +process supervisor), Apply flow with timings, rollback paths, terminal +states + acknowledge, signature-verification opt-in. Ships the manual +smoke runbook the design spec calls for: disposable VM, systemd unit, +forced rollback / rollback-failed / crash-loop scenarios. Notes Tier 3/4 +are deferred to follow-up PRs. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 19: Final sanity sweep + open PR + +**Files:** none (workflow only). + +- [ ] **Step 1: Full type check + tests** + +```bash +pnpm run ts-check +pnpm vitest run src/tests/backend-new/specs/updater +pnpm run test -- --grep "updater|updateActions|updateStatus" +pnpm run test-ui -- src/tests/frontend-new/admin-spec/update-page-actions.spec.ts +pnpm --filter admin run build +``` + +Expected: every step PASS. + +- [ ] **Step 2: Push branch** + +```bash +git push -u origin feat/7607-auto-update-tier2-manual-click +``` + +- [ ] **Step 3: Open PR against `develop`** + +```bash +gh pr create --base develop --title "feat(updater): tier 2 — manual-click update from /admin/update (#7607)" --body "$(cat <<'EOF' +## Summary + +Ships **Tier 2 (manual click)** of the four-tier auto-update design at +`docs/superpowers/specs/2026-04-25-auto-update-design.md`. Builds on PR #7601 +(Tier 1 — notify, merged 2026-05-01). + +- Admins on git installs see an **Apply update** button at `/admin/update`. +- Click flow: pre-flight checks → 60s drain (with T-60/-30/-10 pad broadcasts) → `git fetch / checkout / pnpm install --frozen-lockfile / pnpm run build:ui` → exit 75 for the supervisor to restart. +- 60s health-check on the next boot. On crash loop (bootCount > 2) or health-check timeout we restore the prior SHA + lockfile and exit 75 again. +- Terminal `rollback-failed` state surfaces a strong banner; admin clicks **Acknowledge** to clear after manual recovery. +- New settings under `updates.*`: `preApplyGraceMinutes`, `drainSeconds`, `rollbackHealthCheckSeconds`, `diskSpaceMinMB`, `requireSignature`, `trustedKeysPath` (all opt-in / sane defaults). +- Signature verification (`requireSignature`) is opt-in and stub-friendly: false → log warning and pass; true → `git verify-tag ` against the user keyring (or `trustedKeysPath` via `GNUPGHOME`). Etherpad's release process does not yet sign tags consistently — turning on by default would break Tier 2 for everyone, so this is documented as follow-up. + +Tier 3 (auto with grace window) and Tier 4 (autonomous within maintenance window) are out of scope for this PR. + +## Architecture + +- New atomic units under `src/node/updater/`: `lock` (PID file), `trustedKeys` (gpg via git verify-tag), `preflight` (sequenced check pipeline), `UpdateExecutor` (DI-spawn pipeline), `RollbackHandler` (boot health-timer + crash-loop guard), `SessionDrainer` (timed broadcasts + accept-flag), `updateLog` (rolling appender + tail). +- New routes in `src/node/hooks/express/updateActions.ts`: `POST /admin/update/{apply,cancel,acknowledge}`, `GET /admin/update/log` — strict admin auth. +- `RollbackHandler.checkPendingVerification` wires into boot in `src/node/updater/index.ts`; `markBootHealthy` is called from `src/node/server.ts` after state hits `RUNNING`. +- Admin UI: `UpdatePage` renders Apply/Cancel/Acknowledge per `execution.status`, polls `/admin/update/log` while in flight, surfaces lastResult and policy denial copy. Banner adds a terminal-state alert variant. +- Pad UI: existing shoutMessage pipeline learns to render `update.drain.t60/t30/t10` keys via `html10n.translations` (avoids the unbound `window._` bug). + +## Test plan + +- [x] `pnpm vitest run src/tests/backend-new/specs/updater` — unit suite (lock, preflight, trustedKeys, UpdateExecutor, RollbackHandler, SessionDrainer, updateLog, drainer-handshake, UpdatePolicy, index-boot, state) +- [x] `pnpm run test --grep updateActions` — mocha API tests for the four new endpoints (auth, policy, terminal-state acknowledge) +- [x] `pnpm run test --grep updater-integration` — end-to-end against a tmp git repo: happy path, install-fail rollback, build-fail rollback, crash-loop forced rollback +- [x] `pnpm run test-ui -- src/tests/frontend-new/admin-spec/update-page-actions.spec.ts` — Playwright Apply / policy denial / Acknowledge +- [x] Manual smoke: drain announcement renders the localised string in a real pad +- [x] `pnpm run ts-check` clean, `pnpm --filter admin run build` clean + +## Notes + +- Process supervisor is a hard requirement for Tier 2. Documented in `doc/admin/updates.md`. +- Tag signature verification is opt-in pending a separate "sign all releases" project. Logged as a warning when skipped. + +🤖 Generated with [Claude Code](https://claude.com/claude-code) +EOF +)" +``` + +- [ ] **Step 4: Wait for CI then check, fix anything that breaks** + +```bash +sleep 30 +gh pr checks --watch +``` + +If a check fails, pull the log, fix, push. Per memory `feedback_check_ci_after_pr`, do not move on with red CI. + +- [ ] **Step 5: Action Qodo review** + +Once Qodo posts review comments, fetch and address each per memory `feedback_qodo_pr_feedback`. + +```bash +gh pr view --comments | head -200 +``` + +--- + +## Self-review checklist (run before declaring this plan ready) + +- [ ] Every spec section under "Tier 2 — manual click", "Error handling", "Phased rollout / PR 2" has a corresponding task. +- [ ] Type names / function names are consistent across tasks (e.g., `executeUpdate`, `performRollback`, `checkPendingVerification`, `runPreflight`, `acquireLock`/`releaseLock`/`isHeld`, `createDrainer`, `tailLines`, `verifyReleaseTag`). +- [ ] No "TODO" / "TBD" / "similar to above" / "appropriate validation" placeholder steps. +- [ ] Every `bash` snippet runs without further parameter substitution. +- [ ] Every test step shows the actual test code, not "write a test for this". +- [ ] Every `git commit` step lists the exact files to add and a Conventional-Commits message with the project's standard `Co-Authored-By` footer. +- [ ] Tasks 14 and 17 require a manual visual check; that is documented as a hard gate (per memory `feedback_test_localized_strings`). +- [ ] Tier 3 / 4 are explicitly out of scope. diff --git a/docs/superpowers/specs/2026-04-25-auto-update-runbook.md b/docs/superpowers/specs/2026-04-25-auto-update-runbook.md new file mode 100644 index 00000000000..84b14ff3772 --- /dev/null +++ b/docs/superpowers/specs/2026-04-25-auto-update-runbook.md @@ -0,0 +1,202 @@ +# Etherpad Auto-Update — Manual Smoke Runbook + +**Status:** required gate before each tier ships, per `2026-04-25-auto-update-design.md` § "Phased rollout". +**Audience:** the engineer cutting a release that includes new updater code. +**Time budget:** ~30–40 minutes for the full sweep against a disposable VM. + +This runbook exercises the failure paths that unit and integration tests cannot reach: a real process supervisor, a real `pnpm install` run, real session drain broadcasts to a real pad client. Run it on a throw-away VM you don't mind nuking. + +## 0. Provision a disposable VM + +Anything Linux works; the example below uses Debian/Ubuntu under systemd. + +```bash +# On the VM +sudo adduser --system --group --home /srv/etherpad --shell /bin/bash etherpad +sudo apt update && sudo apt install -y git nodejs ca-certificates +# Etherpad's pnpm comes from corepack — Node 22+ ships it. +sudo -u etherpad bash -c ' + cd /srv/etherpad + git clone https://github.com/ether/etherpad.git current + cd current + corepack enable && corepack prepare pnpm@latest-9 --activate + pnpm install + pnpm run build:ui +' +``` + +## 1. Install Etherpad as a systemd service + +`/etc/systemd/system/etherpad.service`: + +```ini +[Unit] +Description=Etherpad +After=network.target + +[Service] +Type=simple +User=etherpad +WorkingDirectory=/srv/etherpad/current +ExecStart=/usr/bin/pnpm run dev +Restart=on-failure +RestartSec=5 +SuccessExitStatus=75 +# Treat exit 75 as "intentional" so systemd doesn't escalate-restart counters. + +[Install] +WantedBy=multi-user.target +``` + +Then: + +```bash +sudo systemctl daemon-reload +sudo systemctl enable --now etherpad +journalctl -u etherpad -f & # tail the log in another terminal +``` + +## 2. Configure for Tier 2 + +Edit `/srv/etherpad/current/settings.json` and set: + +```jsonc +{ + "updates": { + "tier": "manual", + "checkIntervalHours": 1, + "drainSeconds": 30, // shorten the wait during smoke testing + "rollbackHealthCheckSeconds": 30 + } +} +``` + +`sudo systemctl restart etherpad`. Visit `http://:9001/admin/update` and log in as the admin user from `settings.json`. + +## 3. Force "an update is available" + +The simplest way: `git checkout` to a commit *before* a tagged release. + +```bash +sudo -u etherpad bash -c 'cd /srv/etherpad/current && git checkout v2.7.2' +sudo systemctl restart etherpad +``` + +Trigger an immediate version check (or wait an hour): + +```bash +curl -fsSL http://localhost:9001/admin/update/status | jq . +# Expect: latest.version newer than currentVersion, policy.canManual=true +``` + +The admin UI banner should now read **"Update available"**, and `/admin/update` should show an **"Apply update"** button. + +## 4. Happy path: apply, drain, restart, verify + +1. Open a pad in another browser tab (`http://:9001/p/test`). +2. Click **Apply update** on `/admin/update`. +3. **Within 30 seconds** confirm: + - The pad shows a gritter notification "Etherpad will restart in 30 seconds…" (i18n string from `update.drain.t30`), then `update.drain.t10`. + - The page polls `/admin/update/log`; the `
` block fills with `git fetch / checkout / pnpm install / pnpm run build:ui` output.
+4. systemd journal shows `update executed:  -> ; exiting 75 for supervisor restart`.
+5. systemd restarts the unit (~5s under `RestartSec`).
+6. Reload `/admin/update`. State should be **`verified`** with `lastResult.outcome: "verified"`.
+
+**Sign-off:** every observable transition matches the state machine in the design spec § "State machine". If any step lingers or the page shows a different status, capture `var/log/update.log` and stop.
+
+## 5. Rollback path: install failure
+
+Force a rollback by giving pnpm something it can't resolve.
+
+```bash
+# As etherpad user, in /srv/etherpad/current:
+git checkout v2.7.2
+echo 'lockfileVersion: this-is-not-real-content' >> pnpm-lock.yaml
+sudo systemctl restart etherpad
+```
+
+Visit `/admin/update` and click Apply.
+
+Expected:
+
+- Drain announcement on the pad as before.
+- Log shows `pnpm install --frozen-lockfile` exiting non-zero.
+- State goes through `rolling-back` → `rolled-back`.
+- After supervisor restart, `/admin/update` shows the **rolled-back** banner with `lastResult.reason` describing the install failure.
+- `git rev-parse HEAD` matches the pre-update SHA.
+- Click **Acknowledge** to clear the lastResult banner.
+
+## 6. Rollback path: build failure
+
+```bash
+git checkout v2.7.2
+# Break the build by introducing a syntax error:
+echo 'this is not valid TypeScript' >> src/static/js/pad.ts
+sudo systemctl restart etherpad   # confirm the broken tree still serves; we want apply to fail at build:ui, not at boot
+```
+
+Apply, observe `pnpm run build:ui` exit non-zero in the log, observe `rolling-back` → `rolled-back`. Working tree restored.
+
+Revert the syntax error before continuing.
+
+## 7. Crash-loop guard
+
+Force the new version to crash at boot more than twice. Easiest:
+
+```bash
+# As etherpad user:
+git checkout v2.7.2
+# Apply to v2.7.3, but during the apply window introduce a startup error:
+# (Edit src/node/server.ts in the v2.7.3 tag's worktree to throw immediately.)
+```
+
+Click Apply. The new boot crashes; systemd restarts; RollbackHandler increments `bootCount`. After three crashes, `bootCount > 2` triggers a forced rollback regardless of the health-check timer.
+
+Observe state lands on `rolled-back` with `reason: "health-check-failed-or-crash-loop"`. Working tree on the original SHA.
+
+## 8. Rollback-failed terminal state
+
+Hardest to set up; force `pnpm install` to fail on the rollback path too.
+
+```bash
+# Trigger a normal install-failed rollback (step 5), but BEFORE it runs the
+# rollback step, corrupt the backup lockfile:
+echo garbage > /srv/etherpad/current/var/update-backup/pnpm-lock.yaml
+# … or remove the etherpad user's permission to the install dir mid-flow.
+```
+
+Expected:
+
+- State lands on **`rollback-failed`**.
+- `/admin/update` shows the strong red banner (role=alert) with the
+  `update.banner.terminal.rollback-failed` copy.
+- `policy.canManual` stays true; `policy.canAuto` is false (terminal-blocked).
+- Manually fix the install (restore the lockfile, fix permissions), then
+  click **Acknowledge**. State returns to `idle` and Apply re-enables.
+
+## 9. Cancel during drain
+
+Click Apply. Within 30s, click Cancel.
+
+Expected:
+
+- Drain timers stop firing immediately.
+- State returns to `idle`.
+- `lastResult.outcome: "cancelled"`.
+- `var/update.lock` is gone.
+- No exit; systemd doesn't restart.
+
+## 10. Sign-off checklist
+
+Tick every line before approving the release that introduces this code:
+
+- [ ] Happy path lands on `verified` with the working tree on the new tag.
+- [ ] Install-fail and build-fail rollbacks restore the previous SHA.
+- [ ] Crash-loop guard forces rollback at `bootCount > 2`.
+- [ ] `rollback-failed` shows the strong banner and Acknowledge clears it.
+- [ ] Cancel during drain leaves no lock, returns to `idle`.
+- [ ] Pad client renders the localised drain announcement (NOT the literal i18n key).
+- [ ] systemd journal shows no unhandled rejections, no orphaned processes.
+- [ ] `var/log/update.log` is rotated when it crosses 10 MB (force this by writing >10 MB into the file and triggering an Apply).
+
+If any line is unticked, do not ship the release.
diff --git a/settings.json.docker b/settings.json.docker
index 36becc015fd..1147a88ff2d 100644
--- a/settings.json.docker
+++ b/settings.json.docker
@@ -218,7 +218,13 @@
     "installMethod": "docker",
     "checkIntervalHours": 6,
     "githubRepo": "ether/etherpad",
-    "requireAdminForStatus": false
+    "requireAdminForStatus": false,
+    "preApplyGraceMinutes": 0,
+    "drainSeconds": 60,
+    "rollbackHealthCheckSeconds": 60,
+    "diskSpaceMinMB": 500,
+    "requireSignature": false,
+    "trustedKeysPath": null
   },
 
   /*
diff --git a/settings.json.template b/settings.json.template
index 863286addc1..edde9eb7bea 100644
--- a/settings.json.template
+++ b/settings.json.template
@@ -227,7 +227,22 @@
      * endpoint open (the version is already public via /health). Set true to hide
      * full update detail from non-admins without turning the updater off.
      */
-    "requireAdminForStatus": false
+    "requireAdminForStatus": false,
+    /*
+     * Tier 2+ knobs. Only meaningful at tier "manual" or higher.
+     * - preApplyGraceMinutes: tier 3 only — countdown before an auto-update applies.
+     * - drainSeconds: how long to broadcast "restart imminent" before exiting.
+     * - rollbackHealthCheckSeconds: window after restart for /health to come up.
+     * - diskSpaceMinMB: pre-flight refuses to start an update without this much free.
+     * - requireSignature: refuse updates whose tag isn't signed by a trusted key.
+     * - trustedKeysPath: override the keyring location passed to git verify-tag (GNUPGHOME).
+     */
+    "preApplyGraceMinutes": 0,
+    "drainSeconds": 60,
+    "rollbackHealthCheckSeconds": 60,
+    "diskSpaceMinMB": 500,
+    "requireSignature": false,
+    "trustedKeysPath": null
   },
 
   /*
diff --git a/src/ep.json b/src/ep.json
index bf90c52d43b..08fc734fe71 100644
--- a/src/ep.json
+++ b/src/ep.json
@@ -116,6 +116,13 @@
         "expressCreateServer": "ep_etherpad-lite/node/hooks/express/updateStatus"
       }
     },
+    {
+      "name": "updateActions",
+      "post": ["ep_etherpad-lite/admin"],
+      "hooks": {
+        "expressCreateServer": "ep_etherpad-lite/node/hooks/express/updateActions"
+      }
+    },
     {
       "name": "admin",
       "hooks": {
diff --git a/src/locales/en.json b/src/locales/en.json
index a8602ad35e0..7b899881f0e 100644
--- a/src/locales/en.json
+++ b/src/locales/en.json
@@ -48,6 +48,34 @@
   "update.page.up_to_date": "You are running the latest version.",
   "update.badge.severe": "Etherpad on this server is severely outdated. Tell your admin.",
   "update.badge.vulnerable": "Etherpad on this server is running a version with known security issues. Tell your admin.",
+  "update.page.apply": "Apply update",
+  "update.page.cancel": "Cancel",
+  "update.page.acknowledge": "Acknowledge",
+  "update.page.log": "Update log (last 200 lines)",
+  "update.page.execution": "Status",
+  "update.page.policy.install-method-not-writable": "Updates from the admin UI require a git install. Update via your package manager.",
+  "update.page.policy.rollback-failed-terminal": "A previous update failed and could not be rolled back. Press Acknowledge after the install is healthy to clear the lock.",
+  "update.page.policy.up-to-date": "You are running the latest version.",
+  "update.page.policy.tier-off": "Updates are disabled (updates.tier = \"off\").",
+  "update.page.last_result.verified": "Last update to {{tag}} verified.",
+  "update.page.last_result.rolled-back": "Last attempted update to {{tag}} rolled back: {{reason}}.",
+  "update.page.last_result.rollback-failed": "Last update attempt failed AND rollback failed: {{reason}}. Manual intervention required.",
+  "update.page.last_result.preflight-failed": "Last attempted update to {{tag}} failed preflight: {{reason}}.",
+  "update.page.last_result.cancelled": "Last attempted update to {{tag}} cancelled by admin.",
+  "update.execution.idle": "Idle",
+  "update.execution.preflight": "Pre-flight checks",
+  "update.execution.preflight-failed": "Pre-flight failed",
+  "update.execution.draining": "Draining sessions",
+  "update.execution.executing": "Updating...",
+  "update.execution.pending-verification": "Pending verification",
+  "update.execution.verified": "Verified",
+  "update.execution.rolling-back": "Rolling back",
+  "update.execution.rolled-back": "Rolled back",
+  "update.execution.rollback-failed": "Rollback failed",
+  "update.banner.terminal.rollback-failed": "An update attempt failed and could not be rolled back. Manual intervention required.",
+  "update.drain.t60": "Etherpad will restart in 60 seconds to apply an update.",
+  "update.drain.t30": "Etherpad will restart in 30 seconds to apply an update.",
+  "update.drain.t10": "Etherpad will restart in 10 seconds to apply an update.",
 
   "index.newPad": "New Pad",
   "index.settings": "Settings",
diff --git a/src/node/handler/PadMessageHandler.ts b/src/node/handler/PadMessageHandler.ts
index 65ac9d7626d..22f80ea8dc6 100644
--- a/src/node/handler/PadMessageHandler.ts
+++ b/src/node/handler/PadMessageHandler.ts
@@ -37,6 +37,7 @@ import settings, {
   sofficeAvailable
 } from '../utils/Settings';
 import {anonymizeIp} from '../utils/anonymizeIp';
+import {isAcceptingConnections} from '../updater/SessionDrainer';
 const logIp = (ip: string | null | undefined) => anonymizeIp(ip, settings.ipLogging);
 const securityManager = require('../db/SecurityManager');
 const plugins = require('../../static/js/pluginfw/plugin_defs');
@@ -377,6 +378,17 @@ exports.handleMessage = async (socket:any, message: ClientVarMessage) => {
   if (!thisSession) throw new Error('message from an unknown connection');
 
   if (message.type === 'CLIENT_READY') {
+    // Refuse new joiners while the updater drainer is running. Existing sockets
+    // are unaffected — only the initial CLIENT_READY handshake is gated. The
+    // pad UI will show the drain announcement separately via shoutMessage.
+    // Use socket.emit('message', ...) for consistency with the other disconnect
+    // paths in this file (see line ~221, 569). socket.json.send is a socket.io
+    // v2/v3-era API that may not exist on v4 Socket objects.
+    if (!isAcceptingConnections()) {
+      socket.emit('message', {disconnect: 'updateInProgress'});
+      socket.disconnect(true);
+      return;
+    }
     // Prefer the HttpOnly author-token cookie over the in-message token (GDPR
     // PR3). Legacy clients (pre-PR3 browsers or API consumers) still send
     // `token` in the CLIENT_READY payload — honour it one more release, warn
diff --git a/src/node/hooks/express/socketio.ts b/src/node/hooks/express/socketio.ts
index 9184eff8831..79ef892760b 100644
--- a/src/node/hooks/express/socketio.ts
+++ b/src/node/hooks/express/socketio.ts
@@ -14,6 +14,9 @@ const padMessageHandler = require('../../handler/PadMessageHandler');
 
 let io:any;
 const logger = log4js.getLogger('socket.io');
+
+/** Returns the socket.io Server once expressCreateServer has run, or null otherwise. Used by features that need to broadcast outside the regular hook surface. */
+export const getIo = (): any => io;
 const sockets = new Set();
 const socketsEvents = new events.EventEmitter();
 
diff --git a/src/node/hooks/express/updateActions.ts b/src/node/hooks/express/updateActions.ts
new file mode 100644
index 00000000000..a951dd26b2e
--- /dev/null
+++ b/src/node/hooks/express/updateActions.ts
@@ -0,0 +1,360 @@
+'use strict';
+
+import path from 'node:path';
+import fs from 'node:fs/promises';
+import {spawn} from 'node:child_process';
+import log4js from 'log4js';
+import {ArgsExpressType} from '../../types/ArgsExpressType';
+import settings, {getEpVersion} from '../../utils/Settings';
+import {getDetectedInstallMethod, stateFilePath, getRollbackDeps} from '../../updater';
+import {evaluatePolicy} from '../../updater/UpdatePolicy';
+import {loadState, saveState} from '../../updater/state';
+import {acquireLock, releaseLock} from '../../updater/lock';
+import {executeUpdate, SpawnFn} from '../../updater/UpdateExecutor';
+import {createDrainer, DrainBroadcastKey, Drainer} from '../../updater/SessionDrainer';
+import {runPreflight} from '../../updater/preflight';
+import {verifyReleaseTag} from '../../updater/trustedKeys';
+import {tailLines, appendLine} from '../../updater/updateLog';
+import {performRollback} from '../../updater/RollbackHandler';
+import {UpdateState} from '../../updater/types';
+import {isValidTag} from '../../updater/refSafety';
+import {getIo} from './socketio';
+
+const logger = log4js.getLogger('updater');
+
+const lockPath = (): string => path.join(settings.root, 'var', 'update.lock');
+const logPath = (): string => path.join(settings.root, 'var', 'log', 'update.log');
+const backupDir = (): string => path.join(settings.root, 'var', 'update-backup');
+
+let drainer: Drainer | null = null;
+
+const requireAdmin = (req: any, res: any): boolean => {
+  const u = req.session?.user;
+  if (!u) { res.status(401).send('Authentication required'); return false; }
+  if (!u.is_admin) { res.status(403).send('Forbidden'); return false; }
+  return true;
+};
+
+const wrapAsync =
+  (fn: (req: any, res: any, next: Function) => Promise) =>
+    (req: any, res: any, next: Function) => Promise.resolve(fn(req, res, next)).catch((err) => next(err));
+
+const broadcastShout = (key: DrainBroadcastKey, values: Record): void => {
+  try {
+    const io = getIo();
+    if (!io) return;
+    // The pad-side renderer (src/static/js/pad.ts) already handles `messageKey`
+    // by routing through html10n.get(); we add a `values` field that the
+    // renderer interpolates into the localised string.
+    const message = {
+      type: 'COLLABROOM',
+      data: {
+        type: 'shoutMessage',
+        payload: {
+          message: {messageKey: key, values, sticky: false},
+          timestamp: Date.now(),
+        },
+      },
+    };
+    io.sockets.emit('shout', message);
+  } catch (err) {
+    logger.warn(`broadcastShout: ${(err as Error).message}`);
+  }
+};
+
+const buildPreflightDeps = (installMethod: ReturnType) => ({
+  installMethod,
+  workingTreeClean: () => new Promise((resolve) => {
+    const c = spawn('git', ['status', '--porcelain'], {cwd: settings.root});
+    let out = '';
+    c.stdout.on('data', (b) => { out += b.toString(); });
+    c.on('close', () => resolve(out.trim().length === 0));
+    c.on('error', () => resolve(false));
+  }),
+  freeDiskMB: async (): Promise => {
+    try {
+      const s = await (fs as any).statfs?.(settings.root);
+      if (!s) return Number.POSITIVE_INFINITY;
+      return Math.floor((Number(s.bavail) * Number(s.bsize)) / (1024 * 1024));
+    } catch {
+      // statfs unsupported on this platform — treat as "no constraint" rather than block.
+      return Number.POSITIVE_INFINITY;
+    }
+  },
+  pnpmOnPath: () => new Promise((resolve) => {
+    const c = spawn('pnpm', ['--version'], {stdio: 'ignore'});
+    c.on('close', (code) => resolve(code === 0));
+    c.on('error', () => resolve(false));
+  }),
+  // We just acquired the lock in the apply endpoint, so don't double-check it here.
+  lockHeld: async () => false,
+  remoteHasTag: (tag: string) => new Promise((resolve) => {
+    const c = spawn('git', ['ls-remote', '--tags', 'origin', tag],
+                    {cwd: settings.root, stdio: ['ignore', 'pipe', 'ignore']});
+    let out = '';
+    c.stdout.on('data', (b) => { out += b.toString(); });
+    c.on('close', () => resolve(out.trim().length > 0));
+    c.on('error', () => resolve(false));
+  }),
+  verifyTag: () => verifyReleaseTag({
+    tag: '', // overridden below — we close over targetTag
+    repoDir: settings.root,
+    requireSignature: settings.updates.requireSignature,
+    trustedKeysPath: settings.updates.trustedKeysPath,
+  }),
+});
+
+/**
+ * The set of update tiers at which the Tier 2 action endpoints serve.
+ * `notify` only ships read-only routes (registered in updateStatus.ts);
+ * `manual` and higher are the supersets that include manual-click. Disabled
+ * paths (off / notify) match prior behaviour: requests 404, no new attack
+ * surface vs PR 1.
+ *
+ * Read at request time (not hook-init time) so that operators flipping
+ * `updates.tier` in settings.json + reloading take effect without a full
+ * restart, and so that integration tests can drive the gate dynamically.
+ */
+const TIER2_TIERS: ReadonlySet = new Set(['manual', 'auto', 'autonomous']);
+const tierAllowsActions = (): boolean => TIER2_TIERS.has(settings.updates.tier);
+
+export const expressCreateServer = (
+  _hookName: string,
+  {app}: ArgsExpressType,
+  cb: Function,
+): void => {
+  // Always register the routes; gate at request time so a runtime tier change
+  // takes effect on the next request rather than requiring a restart.
+  // The early 404 below preserves Qodo #1's "disabled path matches prior
+  // behaviour (no Tier 2 endpoints existed before this PR)" requirement.
+  const tierGate = (req: any, res: any, next: Function) => {
+    if (!tierAllowsActions()) return res.status(404).send('Not found');
+    next();
+  };
+  app.use(['/admin/update/apply', '/admin/update/cancel', '/admin/update/acknowledge', '/admin/update/log'], tierGate);
+
+  app.post('/admin/update/apply', wrapAsync(async (req: any, res: any) => {
+    if (!requireAdmin(req, res)) return;
+
+    const state = await loadState(stateFilePath());
+    if (!state.latest) return res.status(409).json({error: 'no-known-latest'});
+
+    // Defence in depth: VersionChecker validates tag_name before persisting,
+    // but a hand-edited update-state.json could still surface an unsafe tag
+    // here. Reject up-front rather than throw later when the executor calls
+    // assertValidTag, so the admin sees a clear 409 instead of a 500.
+    if (!isValidTag(state.latest.tag)) {
+      return res.status(409).json({error: 'invalid-tag-in-state'});
+    }
+
+    // Allowed entry statuses: idle / verified / preflight-failed / rolled-back.
+    // Anything else means an in-flight or terminal-needs-acknowledge state.
+    const allowedEntry = ['idle', 'verified', 'preflight-failed', 'rolled-back'];
+    if (!allowedEntry.includes(state.execution.status)) {
+      return res.status(409).json({error: `execution-busy:${state.execution.status}`});
+    }
+
+    const installMethod = getDetectedInstallMethod();
+    const policy = evaluatePolicy({
+      installMethod,
+      tier: settings.updates.tier,
+      current: getEpVersion(),
+      latest: state.latest.version,
+      executionStatus: state.execution.status,
+    });
+    if (!policy.canManual) {
+      return res.status(409).json({error: 'policy-denied', reason: policy.reason});
+    }
+
+    if (!await acquireLock(lockPath())) {
+      return res.status(409).json({error: 'lock-held'});
+    }
+
+    const targetTag = state.latest.tag;
+    let cleanupLock = true;
+
+    try {
+      // Persist preflight state.
+      const startedAt = new Date().toISOString();
+      const preState: UpdateState = {
+        ...state,
+        execution: {status: 'preflight', targetTag, startedAt},
+      };
+      await saveState(stateFilePath(), preState);
+      appendLine(logPath(), `[${startedAt}] PREFLIGHT target=${targetTag}`);
+
+      const baseDeps = buildPreflightDeps(installMethod);
+      const pf = await runPreflight(
+        {
+          targetTag,
+          diskSpaceMinMB: Number(settings.updates.diskSpaceMinMB) || 500,
+          requireSignature: settings.updates.requireSignature,
+          trustedKeysPath: settings.updates.trustedKeysPath,
+        },
+        {
+          ...baseDeps,
+          verifyTag: () => verifyReleaseTag({
+            tag: targetTag,
+            repoDir: settings.root,
+            requireSignature: settings.updates.requireSignature,
+            trustedKeysPath: settings.updates.trustedKeysPath,
+          }),
+        },
+      );
+
+      if (!pf.ok) {
+        const at = new Date().toISOString();
+        await saveState(stateFilePath(), {
+          ...preState,
+          execution: {status: 'preflight-failed', targetTag, reason: pf.reason, at},
+          lastResult: {
+            targetTag, fromSha: '',
+            outcome: 'preflight-failed', reason: pf.reason, at,
+          },
+        });
+        appendLine(logPath(), `[${at}] PREFLIGHT_FAILED ${pf.reason}`);
+        cleanupLock = true;
+        return res.status(409).json({error: 'preflight-failed', reason: pf.reason});
+      }
+
+      // Re-check state after preflight: /admin/update/cancel may have flipped
+      // execution back to 'idle' while we were running the slow checks. The
+      // cancel handler intentionally leaves the lock alone (we own it) and
+      // signals via state instead, so a stale apply can detect cancellation
+      // here before mutating the filesystem.
+      const afterPreflight = await loadState(stateFilePath());
+      if (afterPreflight.execution.status !== 'preflight'
+          || (afterPreflight.execution as {targetTag?: string}).targetTag !== targetTag) {
+        appendLine(logPath(),
+          `[${new Date().toISOString()}] APPLY aborted post-preflight (state=${afterPreflight.execution.status})`);
+        return res.status(409).json({error: 'cancelled-during-preflight'});
+      }
+
+      // Drain — respond 202 first so the UI starts polling /log without waiting.
+      const drainSeconds = Number(settings.updates.drainSeconds) || 60;
+      drainer = createDrainer({
+        drainSeconds,
+        broadcast: (key, values) => broadcastShout(key, values),
+      });
+      const drainEndsAt = new Date(Date.now() + drainSeconds * 1000).toISOString();
+      await saveState(stateFilePath(), {
+        ...preState,
+        execution: {status: 'draining', targetTag, drainEndsAt, startedAt: new Date().toISOString()},
+      });
+      appendLine(logPath(), `[${new Date().toISOString()}] DRAIN start drainSeconds=${drainSeconds}`);
+
+      res.status(202).json({accepted: true, drainEndsAt});
+
+      const drainResult = await drainer.start();
+      drainer = null;
+      if (drainResult.outcome === 'cancelled') {
+        // /admin/update/cancel already updated state and lastResult; just release the lock.
+        appendLine(logPath(), `[${new Date().toISOString()}] DRAIN cancelled by admin`);
+        return;
+      }
+
+      // Re-load state right before the executor runs so anything the cancel
+      // endpoint or another concurrent handler wrote is honoured.
+      const fresh = await loadState(stateFilePath());
+
+      const r = await executeUpdate({
+        repoDir: settings.root,
+        backupDir: backupDir(),
+        spawnFn: spawn as unknown as SpawnFn,
+        readSha: () => new Promise((resolve, reject) => {
+          const c = spawn('git', ['rev-parse', 'HEAD'],
+                          {cwd: settings.root, stdio: ['ignore', 'pipe', 'ignore']});
+          let out = '';
+          c.stdout.on('data', (b) => { out += b.toString(); });
+          c.on('close', (code) => code === 0
+            ? resolve(out.trim())
+            : reject(new Error(`git rev-parse exit ${code}`)));
+          c.on('error', reject);
+        }),
+        copyFile: async (src: string, dst: string) => {
+          await fs.mkdir(path.dirname(dst), {recursive: true});
+          await fs.copyFile(src, dst);
+        },
+        saveState: (s: UpdateState) => saveState(stateFilePath(), s),
+        initialState: fresh,
+        targetTag,
+        now: () => new Date(),
+        // executeUpdate calls exit on success (75) — that takes the process down,
+        // so anything after this is the failure path.
+        exit: (code: number) => process.exit(code),
+      });
+
+      // Failure paths: executor returned without exiting, state is rolling-back.
+      if (r.outcome !== 'pending-verification') {
+        const after = await loadState(stateFilePath());
+        if (after.execution.status === 'rolling-back') {
+          // performRollback will exit 75 on either success or terminal failure.
+          // We do not release the lock — exit takes the process down and the
+          // next-boot acquireLock reaps the stale PID.
+          cleanupLock = false;
+          await performRollback(after, getRollbackDeps());
+        }
+      }
+    } catch (err) {
+      logger.error(`apply failed: ${(err as Error).stack || err}`);
+      appendLine(logPath(), `[${new Date().toISOString()}] APPLY_ERROR ${(err as Error).message}`);
+      if (!res.headersSent) res.status(500).json({error: 'internal'});
+    } finally {
+      if (cleanupLock) {
+        try { await releaseLock(lockPath()); }
+        catch (err) { logger.warn(`releaseLock: ${(err as Error).message}`); }
+      }
+    }
+  }));
+
+  app.post('/admin/update/cancel', wrapAsync(async (req: any, res: any) => {
+    if (!requireAdmin(req, res)) return;
+    const state = await loadState(stateFilePath());
+    // Cancel is allowed only during pre-execute states. Once executing begins
+    // (filesystem mutated) we either complete or rollback — see spec section
+    // "Error handling" / state machine.
+    if (state.execution.status !== 'preflight' && state.execution.status !== 'draining') {
+      return res.status(409).json({error: 'not-cancellable', status: state.execution.status});
+    }
+    if (drainer) drainer.cancel();
+    const at = new Date().toISOString();
+    await saveState(stateFilePath(), {
+      ...state,
+      execution: {status: 'idle'},
+      lastResult: {
+        targetTag: (state.execution as {targetTag?: string}).targetTag ?? '',
+        fromSha: '',
+        outcome: 'cancelled',
+        reason: 'admin-cancelled',
+        at,
+      },
+    });
+    // Intentionally do NOT release the lock here. The apply handler owns the
+    // lock for its lifetime and releases it in its finally block; releasing
+    // here would let a second apply slip in while the first is still mid-
+    // preflight, racing for the same on-disk state.
+    appendLine(logPath(), `[${at}] CANCEL by admin during status=${state.execution.status}`);
+    res.json({cancelled: true});
+  }));
+
+  app.post('/admin/update/acknowledge', wrapAsync(async (req: any, res: any) => {
+    if (!requireAdmin(req, res)) return;
+    const state = await loadState(stateFilePath());
+    const terminal: ReadonlySet = new Set(['rollback-failed', 'preflight-failed', 'rolled-back']);
+    if (!terminal.has(state.execution.status)) {
+      return res.status(409).json({error: 'not-terminal', status: state.execution.status});
+    }
+    await saveState(stateFilePath(), {...state, execution: {status: 'idle'}, bootCount: 0});
+    appendLine(logPath(), `[${new Date().toISOString()}] ACKNOWLEDGE ${state.execution.status} -> idle`);
+    res.json({acknowledged: true});
+  }));
+
+  app.get('/admin/update/log', wrapAsync(async (req: any, res: any) => {
+    if (!requireAdmin(req, res)) return;
+    const lines = await tailLines(logPath(), 200);
+    res.set('Content-Type', 'text/plain; charset=utf-8');
+    res.send(lines.join('\n'));
+  }));
+
+  cb();
+};
diff --git a/src/node/hooks/express/updateStatus.ts b/src/node/hooks/express/updateStatus.ts
index db30cf52c1d..69d63d889f3 100644
--- a/src/node/hooks/express/updateStatus.ts
+++ b/src/node/hooks/express/updateStatus.ts
@@ -1,11 +1,13 @@
 'use strict';
 
+import path from 'node:path';
 import {ArgsExpressType} from '../../types/ArgsExpressType';
 import settings, {getEpVersion} from '../../utils/Settings';
 import {getDetectedInstallMethod, stateFilePath} from '../../updater';
 import {evaluatePolicy} from '../../updater/UpdatePolicy';
 import {compareSemver, isMajorBehind, isVulnerable} from '../../updater/versionCompare';
 import {loadState} from '../../updater/state';
+import {isHeld} from '../../updater/lock';
 
 
 let badgeCache: {value: 'severe' | 'vulnerable' | null; at: number} = {value: null, at: 0};
@@ -37,6 +39,23 @@ const wrapAsync = (fn: (req: any, res: any, next: Function) => Promise)
     Promise.resolve(fn(req, res, next)).catch((err) => next(err));
   };
 
+/**
+ * Strip diagnostic strings (reason, fromSha, targetTag, build/install paths)
+ * from execution before exposing to unauthenticated callers. Status enum is
+ * preserved so the admin banner / pad-side badge can still render the right UI.
+ */
+const sanitizeExecution = (e: any): any => {
+  if (!e || typeof e !== 'object' || typeof e.status !== 'string') return {status: 'idle'};
+  return {status: e.status};
+};
+
+const sanitizeLastResult = (r: any): any => {
+  if (r === null) return null;
+  if (!r || typeof r !== 'object' || typeof r.outcome !== 'string') return null;
+  // outcome enum + at timestamp are non-sensitive. reason / fromSha / targetTag are dropped.
+  return {outcome: r.outcome, at: typeof r.at === 'string' ? r.at : null};
+};
+
 export const expressCreateServer = (
   _hookName: string,
   {app}: ArgsExpressType,
@@ -68,6 +87,7 @@ export const expressCreateServer = (
   // release. Admins who want the endpoint gated to authenticated admin sessions —
   // without disabling the updater entirely — set updates.requireAdminForStatus=true.
   app.get('/admin/update/status', wrapAsync(async (req, res) => {
+    const isAdmin = !!req.session?.user?.is_admin;
     if (settings.updates.requireAdminForStatus) {
       const user = req.session?.user;
       if (!user) return res.status(401).send('Authentication required');
@@ -77,8 +97,29 @@ export const expressCreateServer = (
     const current = getEpVersion();
     const installMethod = getDetectedInstallMethod();
     const policy = state.latest
-      ? evaluatePolicy({installMethod, tier: settings.updates.tier, current, latest: state.latest.version})
+      ? evaluatePolicy({
+          installMethod,
+          tier: settings.updates.tier,
+          current,
+          latest: state.latest.version,
+          executionStatus: state.execution.status,
+        })
       : null;
+    const lockHeld = await isHeld(path.join(settings.root, 'var', 'update.lock'));
+
+    // The Tier 2 fields (execution, lastResult) carry diagnostic strings
+    // built from git/pnpm stderr — environment-specific paths, error
+    // messages, etc. Endpoint defaults to unauthenticated; only authed
+    // admin sessions see the full diagnostic payload. Everyone else sees
+    // just the status enum + outcome enum so the pad-side / public banners
+    // can still render correctly without leaking operational detail.
+    const execution = isAdmin
+      ? state.execution
+      : sanitizeExecution(state.execution);
+    const lastResult = isAdmin
+      ? state.lastResult
+      : sanitizeLastResult(state.lastResult);
+
     res.json({
       currentVersion: current,
       latest: state.latest,
@@ -87,6 +128,10 @@ export const expressCreateServer = (
       tier: settings.updates.tier,
       policy,
       vulnerableBelow: state.vulnerableBelow,
+      // PR 2 additions:
+      execution,
+      lastResult,
+      lockHeld,
     });
   }));
 
diff --git a/src/node/server.ts b/src/node/server.ts
index 2e06cf6f26a..bef6af07017 100755
--- a/src/node/server.ts
+++ b/src/node/server.ts
@@ -177,6 +177,17 @@ exports.start = async () => {
   // @ts-ignore
   startDoneGate.resolve();
 
+  // Once the server is RUNNING, /health responds 200 — that is the implicit
+  // health signal the updater's pending-verification timer is waiting for.
+  // Wrapped in try/catch because it must never block startup on a bug here.
+  try {
+    // eslint-disable-next-line @typescript-eslint/no-var-requires
+    const updater = require('./updater');
+    if (typeof updater.markBootHealthy === 'function') updater.markBootHealthy();
+  } catch (err) {
+    logger.debug(`markBootHealthy: ${(err as Error).message}`);
+  }
+
   // Return the HTTP server to make it easier to write tests.
   return express.server;
 };
diff --git a/src/node/updater/RollbackHandler.ts b/src/node/updater/RollbackHandler.ts
new file mode 100644
index 00000000000..e90e8b7fd15
--- /dev/null
+++ b/src/node/updater/RollbackHandler.ts
@@ -0,0 +1,246 @@
+import path from 'node:path';
+import log4js from 'log4js';
+import {UpdateState} from './types';
+import type {SpawnFn} from './UpdateExecutor';
+import {appendLine} from './updateLog';
+
+const logger = log4js.getLogger('updater');
+
+export interface RollbackDeps {
+  /** Path of the on-disk Etherpad install (the git working tree). */
+  repoDir: string;
+  /** Where pnpm-lock.yaml was backed up by the executor. */
+  backupDir: string;
+  spawnFn: SpawnFn;
+  copyFile: (src: string, dst: string) => Promise;
+  saveState: (s: UpdateState) => Promise;
+  exit: (code: number) => void;
+  now: () => Date;
+  /** Health-check window after a fresh boot. Default 60s; set via updates.rollbackHealthCheckSeconds. */
+  rollbackHealthCheckSeconds: number;
+}
+
+const runStep = (
+  spawnFn: SpawnFn,
+  cwd: string,
+  logPath: string,
+  cmd: string,
+  args: string[],
+): Promise => new Promise((resolve) => {
+  let settled = false;
+  const settle = (c: number | null) => {
+    if (settled) return;
+    settled = true;
+    resolve(c);
+  };
+  const child = spawnFn(cmd, args, {cwd, stdio: ['ignore', 'pipe', 'pipe']});
+  const tag = `${cmd} ${args.join(' ')}`;
+  child.stdout.on('data', (b: Buffer) => {
+    const t = b.toString().trimEnd();
+    logger.info(`[rollback ${tag}] ${t}`);
+    appendLine(logPath, `[${new Date().toISOString()}] rollback ${tag} | ${t}`);
+  });
+  child.stderr.on('data', (b: Buffer) => {
+    const t = b.toString().trimEnd();
+    logger.warn(`[rollback ${tag}] ${t}`);
+    appendLine(logPath, `[${new Date().toISOString()}] rollback ${tag} ERR | ${t}`);
+  });
+  // Spawn failures (binary missing, permissions) — without this listener the
+  // promise hangs forever and the rollback path never lands on terminal state.
+  child.on('error', (err: Error) => {
+    logger.error(`[rollback ${tag}] spawn error: ${err.message}`);
+    appendLine(logPath, `[${new Date().toISOString()}] rollback ${tag} SPAWN_ERR | ${err.message}`);
+    settle(1);
+  });
+  child.on('close', (c) => settle(c));
+});
+
+/**
+ * Restore the previous SHA + lockfile and exit 75 so the supervisor restarts.
+ *
+ * Lands on `rolled-back` on success, `rollback-failed` on any sub-step error.
+ * Both paths exit 75 — the supervisor restart is what brings the rolled-back
+ * (or terminal) state up where the admin UI can surface it. Rollback-failed
+ * disables auto/autonomous tiers globally (see UpdatePolicy) until an admin
+ * POSTs /admin/update/acknowledge.
+ */
+export const performRollback = async (state: UpdateState, deps: RollbackDeps): Promise => {
+  const exec = state.execution;
+  if (exec.status !== 'rolling-back' && exec.status !== 'pending-verification') {
+    throw new Error(`performRollback called from unexpected status: ${exec.status}`);
+  }
+  const fromSha = (exec as {fromSha: string}).fromSha;
+  const targetTag = (exec as {targetTag: string}).targetTag;
+  const reason = exec.status === 'rolling-back'
+    ? exec.reason
+    : 'health-check-failed-or-crash-loop';
+  const logPath = path.join(deps.repoDir, 'var', 'log', 'update.log');
+
+  const failTerminal = async (subReason: string): Promise => {
+    const at = deps.now().toISOString();
+    await deps.saveState({
+      ...state,
+      execution: {
+        status: 'rollback-failed',
+        reason: `${reason}; rollback also failed: ${subReason}`,
+        targetTag,
+        fromSha,
+        at,
+      },
+      lastResult: {
+        targetTag,
+        fromSha,
+        outcome: 'rollback-failed',
+        reason: `${reason}; rollback failed: ${subReason}`,
+        at,
+      },
+      bootCount: 0,
+    });
+    logger.error(
+      `rollback FAILED: ${subReason}; manual intervention required ` +
+      '(POST /admin/update/acknowledge after fixing)',
+    );
+    appendLine(logPath, `[${at}] ROLLBACK_FAILED ${subReason}`);
+    deps.exit(75);
+  };
+
+  // Force-checkout first so any partial mutation from the failed executor run
+  // (rewritten lockfile, half-installed modules) is discarded. -f overwrites
+  // tracked files from the target tree's index — without it, `git checkout`
+  // refuses when there are unstaged modifications to files it would replace.
+  const checkoutCode = await runStep(
+    deps.spawnFn, deps.repoDir, logPath, 'git', ['checkout', '-f', fromSha]);
+  if (checkoutCode !== 0) return failTerminal(`git checkout -f ${fromSha} exit ${checkoutCode}`);
+
+  // Now overlay the backed-up lockfile on top. Belt-and-braces: a force
+  // checkout already restored the lockfile to the target SHA's version; the
+  // backup wins on the rare case where the running install had a hand-edited
+  // lockfile we want to preserve.
+  try {
+    await deps.copyFile(
+      path.join(deps.backupDir, 'pnpm-lock.yaml'),
+      path.join(deps.repoDir, 'pnpm-lock.yaml'),
+    );
+  } catch (err: any) {
+    // ENOENT on the backup is acceptable — the force checkout already
+    // restored the right lockfile from the index.
+    if (err?.code !== 'ENOENT') {
+      return failTerminal(`copy lockfile: ${(err as Error).message}`);
+    }
+  }
+
+  const installCode = await runStep(deps.spawnFn, deps.repoDir, logPath, 'pnpm', ['install', '--frozen-lockfile']);
+  if (installCode !== 0) return failTerminal(`pnpm install exit ${installCode}`);
+
+  const at = deps.now().toISOString();
+  await deps.saveState({
+    ...state,
+    execution: {status: 'rolled-back', reason, targetTag, restoredSha: fromSha, at},
+    lastResult: {targetTag, fromSha, outcome: 'rolled-back', reason, at},
+    bootCount: 0,
+  });
+  logger.warn(`rolled back to ${fromSha} (reason: ${reason})`);
+  appendLine(logPath, `[${at}] ROLLED_BACK to ${fromSha}; reason=${reason}; exiting 75`);
+  deps.exit(75);
+};
+
+export interface CheckResult {
+  /** True if a health-check timer was armed and is awaiting markVerified or expiry. */
+  armed: boolean;
+  /** Cancels the timer and transitions to `verified`. No-op when armed is false. */
+  markVerified: () => void;
+}
+
+/**
+ * Inspect the persisted execution state at boot and react:
+ *  - idle / verified / etc.: no-op.
+ *  - pending-verification with bootCount > 2: force rollback (crash-loop guard).
+ *  - pending-verification otherwise: increment bootCount, persist, arm a timer.
+ */
+export const checkPendingVerification = (state: UpdateState, deps: RollbackDeps): CheckResult => {
+  const exec = state.execution;
+  if (exec.status !== 'pending-verification') return {armed: false, markVerified: () => {}};
+
+  // Fire-and-forget helpers that swallow rejections cleanly. We intentionally
+  // don't propagate — the boot sequence must proceed even if the rollback
+  // path can't write its terminal state. Worst case: the supervisor restart
+  // brings the same boot back up and the bootCount-based crash-loop guard
+  // catches it on the next attempt.
+  const fireRollback = (s: UpdateState) => {
+    void performRollback(s, deps).catch((err) => {
+      logger.error(`performRollback unhandled rejection: ${(err as Error).message}`);
+      // Best-effort: try to land on rollback-failed terminal state and exit
+      // 75 anyway. If saveState also rejects, log and exit so the supervisor
+      // restart at least re-runs checkPendingVerification with bootCount++.
+      const fb = {
+        ...s,
+        execution: {
+          status: 'rollback-failed' as const,
+          reason: `unhandled rollback rejection: ${(err as Error).message}`,
+          targetTag: (s.execution as {targetTag?: string}).targetTag ?? '',
+          fromSha: (s.execution as {fromSha?: string}).fromSha ?? '',
+          at: deps.now().toISOString(),
+        },
+        bootCount: 0,
+      };
+      void deps.saveState(fb).catch((saveErr) => {
+        logger.error(`fallback saveState rejected: ${(saveErr as Error).message}`);
+      }).finally(() => deps.exit(75));
+    });
+  };
+
+  const fireSaveState = (s: UpdateState, ctx: string) => {
+    void deps.saveState(s).catch((err) => {
+      logger.warn(`saveState (${ctx}) rejected: ${(err as Error).message}`);
+    });
+  };
+
+  if (state.bootCount > 2) {
+    // Don't await — fire and forget so the boot sequence proceeds; the rollback
+    // path will exit 75 asynchronously and the supervisor restarts on the
+    // restored SHA. Rejections caught + best-effort terminal-state write.
+    fireRollback(state);
+    return {armed: false, markVerified: () => {}};
+  }
+
+  const incremented: UpdateState = {...state, bootCount: state.bootCount + 1};
+  fireSaveState(incremented, 'bootCount-increment');
+
+  let cleared = false;
+  const timer = setTimeout(() => {
+    if (cleared) return;
+    fireRollback({
+      ...incremented,
+      execution: {
+        status: 'rolling-back',
+        reason: 'health-check-timeout',
+        targetTag: exec.targetTag,
+        fromSha: exec.fromSha,
+        at: deps.now().toISOString(),
+      },
+    });
+  }, deps.rollbackHealthCheckSeconds * 1000);
+
+  return {
+    armed: true,
+    markVerified: () => {
+      if (cleared) return;
+      cleared = true;
+      clearTimeout(timer);
+      const at = deps.now().toISOString();
+      fireSaveState({
+        ...incremented,
+        execution: {status: 'verified', targetTag: exec.targetTag, verifiedAt: at},
+        lastResult: {
+          targetTag: exec.targetTag,
+          fromSha: exec.fromSha,
+          outcome: 'verified',
+          reason: null,
+          at,
+        },
+        bootCount: 0,
+      }, 'mark-verified');
+      logger.info(`update verified after restart: ${exec.fromSha} -> ${exec.targetTag}`);
+    },
+  };
+};
diff --git a/src/node/updater/SessionDrainer.ts b/src/node/updater/SessionDrainer.ts
new file mode 100644
index 00000000000..d9df727b2c5
--- /dev/null
+++ b/src/node/updater/SessionDrainer.ts
@@ -0,0 +1,91 @@
+/**
+ * Coordinates the pre-restart drain: refuses new pad connections, broadcasts
+ * "system message" announcements at T-60 / T-30 / T-10, and resolves at T=0
+ * so the executor can take over.
+ *
+ * Per docs/superpowers/specs/2026-04-25-auto-update-design.md (section
+ * "Active sessions"). 60s default; configurable via `updates.drainSeconds`.
+ */
+
+let acceptingConnections = true;
+
+export const isAcceptingConnections = (): boolean => acceptingConnections;
+
+/** Test-only: reset the module-level flag between tests. */
+export const _resetForTests = (): void => { acceptingConnections = true; };
+
+export type DrainBroadcastKey =
+  | 'update.drain.t60'
+  | 'update.drain.t30'
+  | 'update.drain.t10';
+
+export interface DrainerOpts {
+  drainSeconds: number;
+  /** Called for every announcement; values carries timing data the i18n string can interpolate. */
+  broadcast: (i18nKey: DrainBroadcastKey, values: Record) => void;
+}
+
+export interface Drainer {
+  start: () => Promise<{outcome: 'completed' | 'cancelled'}>;
+  cancel: () => void;
+}
+
+export const createDrainer = ({drainSeconds, broadcast}: DrainerOpts): Drainer => {
+  const timers: NodeJS.Timeout[] = [];
+  let resolveDone: ((r: {outcome: 'completed' | 'cancelled'}) => void) | null = null;
+  let cancelled = false;
+  let started = false;
+
+  const fire = (key: DrainBroadcastKey, secondsRemaining: number) => {
+    if (cancelled) return;
+    broadcast(key, {seconds: secondsRemaining});
+  };
+
+  const start = (): Promise<{outcome: 'completed' | 'cancelled'}> => {
+    if (started) return Promise.reject(new Error('drainer already started'));
+    started = true;
+    acceptingConnections = false;
+    return new Promise((resolve) => {
+      resolveDone = resolve;
+      const ms = drainSeconds * 1000;
+      // The opening announcement reports the actual drain length rather than a
+      // hardcoded 60, so a configured drainSeconds of e.g. 30 says "30 seconds".
+      // i18n key is still update.drain.t60 — that's the "start of drain" key in
+      // the locale file; the {{seconds}} placeholder carries the real value.
+      fire('update.drain.t60', drainSeconds);
+      // Only schedule T-30 / T-10 when the configured window can actually
+      // honour them. Firing a "30 seconds" message at zero remaining (because
+      // ms - 30_000 < 0) is misleading; admins picking a short drainSeconds
+      // get fewer announcements but each carries an accurate countdown.
+      if (drainSeconds > 30) {
+        timers.push(setTimeout(() => fire('update.drain.t30', 30), ms - 30_000));
+      }
+      if (drainSeconds > 10) {
+        timers.push(setTimeout(() => fire('update.drain.t10', 10), ms - 10_000));
+      }
+      timers.push(setTimeout(() => {
+        if (cancelled) return;
+        // Restore the gate as soon as the drain window closes. The executor
+        // takes over from here and the supervisor restart wipes module state
+        // anyway; if the executor throws and the process keeps running, we
+        // want join handshakes to recover rather than stay wedged.
+        // The lock + state.execution.status guarantee no fresh apply can race.
+        acceptingConnections = true;
+        resolveDone?.({outcome: 'completed'});
+        resolveDone = null;
+      }, ms));
+    });
+  };
+
+  const cancel = (): void => {
+    if (cancelled) return;
+    cancelled = true;
+    for (const t of timers) clearTimeout(t);
+    timers.length = 0;
+    acceptingConnections = true;
+    resolveDone?.({outcome: 'cancelled'});
+    resolveDone = null;
+  };
+
+  return {start, cancel};
+};
diff --git a/src/node/updater/UpdateExecutor.ts b/src/node/updater/UpdateExecutor.ts
new file mode 100644
index 00000000000..07881065e46
--- /dev/null
+++ b/src/node/updater/UpdateExecutor.ts
@@ -0,0 +1,219 @@
+import path from 'node:path';
+import log4js from 'log4js';
+import {SpawnOptions} from 'node:child_process';
+import {UpdateState} from './types';
+import {appendLine} from './updateLog';
+import {assertValidTag, refsTagsForm} from './refSafety';
+
+const logger = log4js.getLogger('updater');
+
+export interface SpawnedChild {
+  stdout: {on: (event: 'data', cb: (chunk: Buffer) => void) => void};
+  stderr: {on: (event: 'data', cb: (chunk: Buffer) => void) => void};
+  on: {
+    (event: 'close', cb: (code: number | null) => void): void;
+    (event: 'error', cb: (err: Error) => void): void;
+  };
+}
+
+export type SpawnFn = (cmd: string, args: string[], opts: SpawnOptions) => SpawnedChild;
+
+export interface ExecutorDeps {
+  /** Path of the on-disk Etherpad install (the git working tree). */
+  repoDir: string;
+  /** Where pnpm-lock.yaml + sha info gets backed up. */
+  backupDir: string;
+  /** Injected child_process.spawn so tests can drive the pipeline deterministically. */
+  spawnFn: SpawnFn;
+  /** Returns the current HEAD SHA. Production callers wrap `git rev-parse HEAD`. */
+  readSha: () => Promise;
+  /** Plain file copy. Production callers use fs.copyFile (with mkdir-p of parent). */
+  copyFile: (src: string, dst: string) => Promise;
+  /** Persist the in-flight UpdateState. Production callers use saveState(stateFilePath()). */
+  saveState: (s: UpdateState) => Promise;
+  /** State as it was when Apply was clicked — preserves Tier 1 fields (latest, email, etc.). */
+  initialState: UpdateState;
+  /** Tag to update to. */
+  targetTag: string;
+  /** Clock injection for deterministic timestamps in tests. */
+  now: () => Date;
+  /** process.exit injection so tests can assert exit code without actually exiting. */
+  exit: (code: number) => void;
+}
+
+export type ExecutorResult =
+  | {outcome: 'pending-verification'}
+  | {outcome: 'failed-install'; reason: string}
+  | {outcome: 'failed-build'; reason: string}
+  | {outcome: 'failed-checkout'; reason: string};
+
+const runStep = (
+  spawnFn: SpawnFn,
+  repoDir: string,
+  logPath: string,
+  cmd: string,
+  args: string[],
+): Promise<{code: number | null; stderr: string}> => new Promise((resolve) => {
+  let stderr = '';
+  let settled = false;
+  const settle = (v: {code: number | null; stderr: string}) => {
+    if (settled) return;
+    settled = true;
+    resolve(v);
+  };
+  const child = spawnFn(cmd, args, {cwd: repoDir, stdio: ['ignore', 'pipe', 'pipe']});
+  const tag = `${cmd} ${args.join(' ')}`;
+  child.stdout.on('data', (chunk: Buffer) => {
+    const txt = chunk.toString().trimEnd();
+    logger.info(`[${tag}] ${txt}`);
+    appendLine(logPath, `[${new Date().toISOString()}] ${tag} | ${txt}`);
+  });
+  child.stderr.on('data', (chunk: Buffer) => {
+    const txt = chunk.toString();
+    stderr += txt;
+    const trimmed = txt.trimEnd();
+    logger.warn(`[${tag}] ${trimmed}`);
+    appendLine(logPath, `[${new Date().toISOString()}] ${tag} ERR | ${trimmed}`);
+  });
+  // Spawn failures (binary missing, permissions) emit 'error' and never close.
+  // Without this listener the promise hangs forever and leaves state in-flight.
+  // Treat as exit code 1 with the error message in stderr so the caller's
+  // failure-detection branch fires normally.
+  child.on('error', (err: Error) => {
+    logger.error(`[${tag}] spawn error: ${err.message}`);
+    appendLine(logPath, `[${new Date().toISOString()}] ${tag} SPAWN_ERR | ${err.message}`);
+    settle({code: 1, stderr: stderr + err.message});
+  });
+  child.on('close', (code) => settle({code, stderr}));
+});
+
+/**
+ * Run the update pipeline. Each transition writes state before/after so a hard
+ * kill mid-step lands the next boot in a known state for RollbackHandler.
+ *
+ * On install/build/checkout failure the executor transitions to `rolling-back`,
+ * persists, and returns. The route layer then runs RollbackHandler.performRollback.
+ * The executor does NOT call `exit` on failure paths — the rollback path owns
+ * that exit so we don't double-exit and lose log lines.
+ *
+ * On a thrown exception (e.g., copyFile EACCES, saveState ENOSPC) the executor
+ * also transitions to rolling-back with `failed-checkout` so the route's post-
+ * executor rollback path picks it up. The state must never get stuck at
+ * `executing` — if it does, no further updates can start until an admin
+ * acknowledges.
+ */
+export const executeUpdate = async (deps: ExecutorDeps): Promise => {
+  const logPath = path.join(deps.repoDir, 'var', 'log', 'update.log');
+  let fromSha = '';
+
+  // Wrap the whole body so any throw — readSha, saveState, copyFile, even an
+  // unexpected synchronous error in a step — lands us at rolling-back rather
+  // than leaving execution stuck at 'executing' forever.
+  try {
+    // Reject unsafe release-tag strings (option injection guard).
+    // Tag is sourced from GitHub's tag_name and persisted into update-state.json;
+    // a tag starting with '-' would otherwise be parsed by git as an option flag.
+    const safeTag = assertValidTag(deps.targetTag);
+    fromSha = await deps.readSha();
+
+    let s: UpdateState = {
+      ...deps.initialState,
+      execution: {
+        status: 'executing',
+        targetTag: deps.targetTag,
+        fromSha,
+        startedAt: deps.now().toISOString(),
+      },
+      bootCount: 0,
+    };
+    await deps.saveState(s);
+
+    // Snapshot lockfile (SHA already captured above; the rollback handler reads
+    // execution.fromSha rather than a separate file so a successful rollback
+    // doesn't depend on /var staying writable past this point).
+    await deps.copyFile(
+      path.join(deps.repoDir, 'pnpm-lock.yaml'),
+      path.join(deps.backupDir, 'pnpm-lock.yaml'),
+    );
+
+    const fail = async (
+      outcome: 'failed-install' | 'failed-build' | 'failed-checkout',
+      reason: string,
+    ): Promise => {
+      s = {
+        ...s,
+        execution: {
+          status: 'rolling-back',
+          reason,
+          targetTag: deps.targetTag,
+          fromSha,
+          at: deps.now().toISOString(),
+        },
+      };
+      await deps.saveState(s);
+      logger.error(`update step failed (${outcome}): ${reason}`);
+      appendLine(logPath, `[${deps.now().toISOString()}] FAIL ${outcome}: ${reason}`);
+      return {outcome, reason};
+    };
+
+    let r = await runStep(deps.spawnFn, deps.repoDir, logPath, 'git', ['fetch', '--tags', 'origin']);
+    if (r.code !== 0) return fail('failed-checkout', `git fetch exit ${r.code}: ${r.stderr.trim()}`);
+
+    // Use the refs/tags/ form so even an unforeseen edge-case in the tag
+    // string can't be parsed as a git option. assertValidTag above already
+    // rules out leading '-' / whitespace / shell metacharacters.
+    r = await runStep(
+      deps.spawnFn, deps.repoDir, logPath, 'git', ['checkout', refsTagsForm(safeTag)]);
+    if (r.code !== 0) return fail('failed-checkout', `git checkout exit ${r.code}: ${r.stderr.trim()}`);
+
+    r = await runStep(deps.spawnFn, deps.repoDir, logPath, 'pnpm', ['install', '--frozen-lockfile']);
+    if (r.code !== 0) return fail('failed-install', `pnpm install exit ${r.code}: ${r.stderr.trim()}`);
+
+    r = await runStep(deps.spawnFn, deps.repoDir, logPath, 'pnpm', ['run', 'build:ui']);
+    if (r.code !== 0) return fail('failed-build', `pnpm run build:ui exit ${r.code}: ${r.stderr.trim()}`);
+
+    // pending-verification: the next boot's RollbackHandler arms the health-check timer.
+    s = {
+      ...s,
+      execution: {
+        status: 'pending-verification',
+        targetTag: deps.targetTag,
+        fromSha,
+        // Real deadline is computed at next boot using rollbackHealthCheckSeconds.
+        // We persist a placeholder here purely so the field is present.
+        deadlineAt: deps.now().toISOString(),
+      },
+      bootCount: 0,
+    };
+    await deps.saveState(s);
+    logger.info(`update executed: ${fromSha} -> ${deps.targetTag}; exiting 75 for supervisor restart`);
+    void appendLine(logPath, `[${deps.now().toISOString()}] OK pending-verification ${fromSha} -> ${deps.targetTag}; exiting 75`);
+    deps.exit(75);
+    return {outcome: 'pending-verification'};
+  } catch (err) {
+    // Unexpected throw — fs ENOSPC, EACCES on the backup dir, network blip
+    // surfaced through readSha, etc. Persist rolling-back so the route's
+    // post-executor rollback path runs and the state never wedges at 'executing'.
+    const reason = `executor exception: ${(err as Error).message}`;
+    logger.error(reason);
+    void appendLine(logPath, `[${deps.now().toISOString()}] EXECUTOR_THROW ${reason}`);
+    try {
+      await deps.saveState({
+        ...deps.initialState,
+        execution: {
+          status: 'rolling-back',
+          reason,
+          targetTag: deps.targetTag,
+          fromSha,
+          at: deps.now().toISOString(),
+        },
+        bootCount: 0,
+      });
+    } catch (saveErr) {
+      // Even saveState threw. Best-effort log, rethrow original — the route's
+      // catch will surface it. State on disk is whatever last successfully wrote.
+      logger.error(`could not persist rolling-back: ${(saveErr as Error).message}`);
+    }
+    return {outcome: 'failed-checkout', reason};
+  }
+};
diff --git a/src/node/updater/UpdatePolicy.ts b/src/node/updater/UpdatePolicy.ts
index ed00229da8e..c9ace999690 100644
--- a/src/node/updater/UpdatePolicy.ts
+++ b/src/node/updater/UpdatePolicy.ts
@@ -10,14 +10,27 @@ export interface PolicyInput {
   tier: Tier;
   current: string;
   latest: string;
+  /**
+   * Optional execution-status hint. Only `rollback-failed` materially changes
+   * policy: while it's set, canAuto / canAutonomous are denied (an admin must
+   * acknowledge first). canManual stays on because clicking Apply *is* the
+   * intervention the terminal state requires.
+   */
+  executionStatus?: string;
 }
 
 /**
- * Decide which update tiers are allowed under the given (installMethod, tier, current, latest).
- * Pure function — no I/O. The single source of truth for "what's allowed in this environment."
- * `reason` is one of: 'tier-off' | 'up-to-date' | 'install-method-not-writable' | 'ok'.
+ * Decide which update tiers are allowed under the given (installMethod, tier,
+ * current, latest, executionStatus). Pure function — no I/O. The single source
+ * of truth for "what's allowed in this environment."
+ *
+ * `reason` is one of:
+ *   'tier-off' | 'up-to-date' | 'install-method-not-writable'
+ *   | 'rollback-failed-terminal' | 'ok'.
  */
-export const evaluatePolicy = ({installMethod, tier, current, latest}: PolicyInput): PolicyResult => {
+export const evaluatePolicy = ({
+  installMethod, tier, current, latest, executionStatus,
+}: PolicyInput): PolicyResult => {
   if (tier === 'off') {
     return {canNotify: false, canManual: false, canAuto: false, canAutonomous: false, reason: 'tier-off'};
   }
@@ -32,11 +45,12 @@ export const evaluatePolicy = ({installMethod, tier, current, latest}: PolicyInp
     return {canNotify, canManual: false, canAuto: false, canAutonomous: false, reason: 'install-method-not-writable'};
   }
 
+  const terminal = executionStatus === 'rollback-failed';
   return {
     canNotify,
     canManual: tier === 'manual' || tier === 'auto' || tier === 'autonomous',
-    canAuto: tier === 'auto' || tier === 'autonomous',
-    canAutonomous: tier === 'autonomous',
-    reason: 'ok',
+    canAuto: !terminal && (tier === 'auto' || tier === 'autonomous'),
+    canAutonomous: !terminal && tier === 'autonomous',
+    reason: terminal ? 'rollback-failed-terminal' : 'ok',
   };
 };
diff --git a/src/node/updater/VersionChecker.ts b/src/node/updater/VersionChecker.ts
index 8dc1f8d5f5d..ff4b0f34a52 100644
--- a/src/node/updater/VersionChecker.ts
+++ b/src/node/updater/VersionChecker.ts
@@ -1,5 +1,6 @@
 import {ReleaseInfo, VulnerableBelowDirective} from './types';
 import {parseVulnerableBelow} from './versionCompare';
+import {isValidTag} from './refSafety';
 
 export interface FetchResult {
   status: number;
@@ -49,6 +50,15 @@ export const checkLatestRelease = async (
     return {kind: 'error', status: 200};
   }
 
+  // Reject any tag that would be unsafe to hand to git later. Validating at
+  // the persistence boundary (rather than only at the executor) means a
+  // malformed tag_name from a misconfigured fork-as-github-repo never lands
+  // in update-state.json. Treated as a fetch error so the polling loop will
+  // try again next interval.
+  if (!isValidTag(j.tag_name)) {
+    return {kind: 'error', status: 200};
+  }
+
   const tag = j.tag_name;
   const version = tag.replace(/^v/, '');
   const body: string = typeof j.body === 'string' ? j.body : '';
diff --git a/src/node/updater/index.ts b/src/node/updater/index.ts
index 475c0599231..22e08042de5 100644
--- a/src/node/updater/index.ts
+++ b/src/node/updater/index.ts
@@ -1,4 +1,6 @@
 import path from 'node:path';
+import {spawn} from 'node:child_process';
+import fs from 'node:fs/promises';
 import log4js from 'log4js';
 import settings, {getEpVersion} from '../utils/Settings';
 import {detectInstallMethod} from './InstallMethodDetector';
@@ -7,6 +9,8 @@ import {loadState, saveState} from './state';
 import {isMajorBehind, isVulnerable} from './versionCompare';
 import {evaluatePolicy} from './UpdatePolicy';
 import {decideEmails} from './Notifier';
+import {checkPendingVerification, CheckResult, RollbackDeps} from './RollbackHandler';
+import type {SpawnFn} from './UpdateExecutor';
 import {InstallMethod, UpdateState} from './types';
 
 const logger = log4js.getLogger('updater');
@@ -16,6 +20,7 @@ let timer: NodeJS.Timeout | null = null;
 let initialTimer: NodeJS.Timeout | null = null;
 let checkInFlight = false;
 let inMemoryState: UpdateState | null = null;
+let pendingVerification: CheckResult | null = null;
 
 export const stateFilePath = () => path.join(settings.root, 'var', 'update-state.json');
 
@@ -126,6 +131,21 @@ const startPolling = (): void => {
   initialTimer = setTimeout(() => { initialTimer = null; void performCheck(); }, 5000);
 };
 
+/** Build the dependency bundle RollbackHandler / UpdateExecutor expect. */
+export const getRollbackDeps = (): RollbackDeps => ({
+  repoDir: settings.root,
+  backupDir: path.join(settings.root, 'var', 'update-backup'),
+  spawnFn: spawn as unknown as SpawnFn,
+  copyFile: async (src: string, dst: string) => {
+    await fs.mkdir(path.dirname(dst), {recursive: true});
+    await fs.copyFile(src, dst);
+  },
+  saveState: (s: UpdateState) => saveState(stateFilePath(), s),
+  exit: (code: number) => process.exit(code),
+  now: () => new Date(),
+  rollbackHealthCheckSeconds: Number(settings.updates.rollbackHealthCheckSeconds) || 60,
+});
+
 /** Hook entry point — called by ep.json on createServer. */
 export const expressCreateServer = async (): Promise => {
   detectedMethod = await detectInstallMethod({
@@ -133,9 +153,29 @@ export const expressCreateServer = async (): Promise => {
     rootDir: settings.root,
   });
   logger.info(`updater: install method = ${detectedMethod}, tier = ${settings.updates.tier}`);
+
+  // Tier 2: if the previous boot left the state in pending-verification, arm
+  // the health-check timer (or force rollback when bootCount has climbed past
+  // the crash-loop threshold). This must run BEFORE polling starts so the
+  // rollback can fire even if the version checker is misconfigured.
+  const state = await getCurrentState();
+  pendingVerification = checkPendingVerification(state, getRollbackDeps());
+
   if (settings.updates.tier !== 'off') startPolling();
 };
 
+/**
+ * Called by the Etherpad runtime once the express stack is fully wired and
+ * /health responds — that's the implicit health signal the
+ * pending-verification timer is waiting for.
+ */
+export const markBootHealthy = (): void => {
+  if (pendingVerification) {
+    pendingVerification.markVerified();
+    pendingVerification = null;
+  }
+};
+
 /** Shutdown hook. */
 export const shutdown = async (): Promise => {
   if (timer) { clearInterval(timer); timer = null; }
diff --git a/src/node/updater/lock.ts b/src/node/updater/lock.ts
new file mode 100644
index 00000000000..2dd00e5cbd2
--- /dev/null
+++ b/src/node/updater/lock.ts
@@ -0,0 +1,78 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+
+interface LockFile {
+  pid: number;
+  at: string;
+}
+
+const isPidLive = (pid: number): boolean => {
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (err: any) {
+    // ESRCH = no such process (stale).
+    // EPERM = exists but we can't signal — treat as live (some other user owns it).
+    return err.code !== 'ESRCH';
+  }
+};
+
+const readIfPresent = async (lockPath: string): Promise => {
+  let raw: string;
+  try {
+    raw = await fs.readFile(lockPath, 'utf8');
+  } catch (err: any) {
+    if (err.code === 'ENOENT') return null;
+    return null;
+  }
+  let parsed: unknown;
+  try { parsed = JSON.parse(raw); } catch { return null; }
+  if (!parsed || typeof parsed !== 'object') return null;
+  const p = parsed as Record;
+  if (typeof p.pid !== 'number' || typeof p.at !== 'string') return null;
+  return {pid: p.pid, at: p.at};
+};
+
+/**
+ * Atomic acquire via O_CREAT|O_EXCL. If the file already exists, the holder's
+ * PID is checked; when dead we reap it and retry once. Returns false on a live
+ * conflict — the caller is expected to surface "lock-held" to the admin.
+ */
+export const acquireLock = async (lockPath: string): Promise => {
+  await fs.mkdir(path.dirname(lockPath), {recursive: true});
+  const payload = JSON.stringify({pid: process.pid, at: new Date().toISOString()});
+
+  const tryCreate = async (): Promise => {
+    try {
+      const fh = await fs.open(lockPath, 'wx');
+      try { await fh.writeFile(payload); } finally { await fh.close(); }
+      return true;
+    } catch (err: any) {
+      if (err.code === 'EEXIST') return false;
+      throw err;
+    }
+  };
+
+  if (await tryCreate()) return true;
+
+  const existing = await readIfPresent(lockPath);
+  if (existing && isPidLive(existing.pid)) return false;
+
+  // Stale or unparseable — reap and retry once. A concurrent reaper may beat us,
+  // in which case the second tryCreate also returns false (correctly: someone
+  // else holds it now).
+  try { await fs.unlink(lockPath); }
+  catch (err: any) { if (err.code !== 'ENOENT') throw err; }
+  return tryCreate();
+};
+
+export const releaseLock = async (lockPath: string): Promise => {
+  try { await fs.unlink(lockPath); }
+  catch (err: any) { if (err.code !== 'ENOENT') throw err; }
+};
+
+/** True iff the lock file exists *and* the recorded PID is live. Stale locks read as not-held. */
+export const isHeld = async (lockPath: string): Promise => {
+  const f = await readIfPresent(lockPath);
+  return !!f && isPidLive(f.pid);
+};
diff --git a/src/node/updater/preflight.ts b/src/node/updater/preflight.ts
new file mode 100644
index 00000000000..f0403e186b6
--- /dev/null
+++ b/src/node/updater/preflight.ts
@@ -0,0 +1,54 @@
+import {InstallMethod} from './types';
+import type {VerifyResult} from './trustedKeys';
+
+export type PreflightReason =
+  | 'install-method-not-writable'
+  | 'dirty-working-tree'
+  | 'low-disk-space'
+  | 'pnpm-not-found'
+  | 'lock-held'
+  | 'remote-tag-missing'
+  | 'signature-verification-failed';
+
+export interface PreflightInput {
+  targetTag: string;
+  diskSpaceMinMB: number;
+  requireSignature: boolean;
+  trustedKeysPath: string | null;
+}
+
+export interface PreflightDeps {
+  installMethod: Exclude;
+  workingTreeClean: () => Promise;
+  freeDiskMB: () => Promise;
+  pnpmOnPath: () => Promise;
+  lockHeld: () => Promise;
+  remoteHasTag: (tag: string) => Promise;
+  verifyTag: () => Promise;
+}
+
+export type PreflightResult = {ok: true} | {ok: false; reason: PreflightReason};
+
+const WRITABLE_METHODS: ReadonlySet> = new Set(['git']);
+
+/**
+ * Sequenced preflight: each check is fast and reads the world. Order matters —
+ * cheap, definitive failures (install method) run before slow ones (network
+ * tag lookup, gpg). The first failure short-circuits.
+ */
+export const runPreflight = async (
+  input: PreflightInput,
+  deps: PreflightDeps,
+): Promise => {
+  if (!WRITABLE_METHODS.has(deps.installMethod)) {
+    return {ok: false, reason: 'install-method-not-writable'};
+  }
+  if (!await deps.workingTreeClean()) return {ok: false, reason: 'dirty-working-tree'};
+  if ((await deps.freeDiskMB()) < input.diskSpaceMinMB) return {ok: false, reason: 'low-disk-space'};
+  if (!await deps.pnpmOnPath()) return {ok: false, reason: 'pnpm-not-found'};
+  if (await deps.lockHeld()) return {ok: false, reason: 'lock-held'};
+  if (!await deps.remoteHasTag(input.targetTag)) return {ok: false, reason: 'remote-tag-missing'};
+  const sig = await deps.verifyTag();
+  if (!sig.ok) return {ok: false, reason: 'signature-verification-failed'};
+  return {ok: true};
+};
diff --git a/src/node/updater/refSafety.ts b/src/node/updater/refSafety.ts
new file mode 100644
index 00000000000..e837a628ad8
--- /dev/null
+++ b/src/node/updater/refSafety.ts
@@ -0,0 +1,43 @@
+/**
+ * Safety helpers for any release-tag string Etherpad's updater hands to git.
+ *
+ * The release tag originates from GitHub's `releases/latest` API (`tag_name`)
+ * and is then persisted into `var/update-state.json`. A tag that starts with
+ * `-` would be parsed by git as an option flag rather than a positional ref —
+ * `git checkout -fast-forward` and similar tricks could bypass signature
+ * verification or change checkout semantics. A tag with shell metacharacters
+ * is less of an issue under `child_process.spawn` (no shell), but we reject
+ * those too because git's own ref-name rules forbid them and a malformed tag
+ * has nowhere reasonable to be honoured anyway.
+ *
+ * Rules (a subset of git's check-ref-format spec — strict on purpose):
+ *   - Non-empty.
+ *   - Length <= 200.
+ *   - May not start with `-` (option injection) or `.` (git rejects).
+ *   - May not contain whitespace, NUL, or any of: ~ ^ : ? * [ \\
+ *   - May not contain `..` (git's own rule).
+ *
+ * Callers should also use the `refs/tags/` form when invoking git so
+ * that even an unforeseen edge-case can't be parsed as an option, and pass
+ * `--` as an end-of-options marker on commands that accept it.
+ */
+
+const FORBIDDEN_CHARS = /[\s\x00~^:?*\[\\]/;
+
+export const isValidTag = (tag: unknown): tag is string => {
+  if (typeof tag !== 'string') return false;
+  if (tag.length === 0 || tag.length > 200) return false;
+  if (tag.startsWith('-') || tag.startsWith('.')) return false;
+  if (FORBIDDEN_CHARS.test(tag)) return false;
+  if (tag.includes('..')) return false;
+  return true;
+};
+
+/** Throwing form for hot paths where invalid input is a programmer/data error. */
+export const assertValidTag = (tag: unknown): string => {
+  if (!isValidTag(tag)) throw new Error(`unsafe release tag: ${JSON.stringify(tag)}`);
+  return tag as string;
+};
+
+/** Wrap a validated tag in the `refs/tags/` form for git invocations. */
+export const refsTagsForm = (tag: string): string => `refs/tags/${tag}`;
diff --git a/src/node/updater/state.ts b/src/node/updater/state.ts
index 05f97e8ab56..be425321681 100644
--- a/src/node/updater/state.ts
+++ b/src/node/updater/state.ts
@@ -1,6 +1,6 @@
 import fs from 'node:fs/promises';
 import path from 'node:path';
-import {EMPTY_STATE, UpdateState} from './types';
+import {EMPTY_STATE, EXECUTION_STATUSES, UpdateState} from './types';
 
 const isPlainObject = (v: unknown): v is Record =>
   v !== null && typeof v === 'object' && !Array.isArray(v);
@@ -8,6 +8,52 @@ const isPlainObject = (v: unknown): v is Record =>
 const isStringOrNull = (v: unknown): v is string | null =>
   v === null || typeof v === 'string';
 
+// Per-status field requirements that mirror the ExecutionStatus union in types.ts.
+// Persisted-state corruption (a hand-edited file or a future schema bump that
+// missed a migration) must never reach RollbackHandler with `undefined` refs —
+// loadState resets to EMPTY_STATE when any required field is missing.
+const EXEC_REQUIRED_FIELDS: Record = {
+  'idle': [],
+  'preflight': ['targetTag', 'startedAt'],
+  'preflight-failed': ['targetTag', 'reason', 'at'],
+  'draining': ['targetTag', 'drainEndsAt', 'startedAt'],
+  'executing': ['targetTag', 'fromSha', 'startedAt'],
+  'pending-verification': ['targetTag', 'fromSha', 'deadlineAt'],
+  'verified': ['targetTag', 'verifiedAt'],
+  'rolling-back': ['reason', 'targetTag', 'fromSha', 'at'],
+  'rolled-back': ['reason', 'targetTag', 'restoredSha', 'at'],
+  'rollback-failed': ['reason', 'targetTag', 'fromSha', 'at'],
+};
+
+const isValidExecution = (v: unknown): boolean => {
+  if (!isPlainObject(v)) return false;
+  if (typeof v.status !== 'string') return false;
+  if (!(EXECUTION_STATUSES as readonly string[]).includes(v.status)) return false;
+  const required = EXEC_REQUIRED_FIELDS[v.status];
+  if (!required) return false; // unknown status — fail closed
+  for (const field of required) {
+    if (typeof (v as Record)[field] !== 'string') return false;
+    if (((v as Record)[field] as string).length === 0) return false;
+  }
+  return true;
+};
+
+// Outcomes that LastUpdateResult.outcome must match.
+const VALID_OUTCOMES: ReadonlySet = new Set([
+  'verified', 'rolled-back', 'rollback-failed', 'preflight-failed', 'cancelled',
+]);
+
+const isValidLastResult = (v: unknown): boolean => {
+  if (v === null) return true;
+  if (!isPlainObject(v)) return false;
+  return typeof v.targetTag === 'string'
+    && typeof v.fromSha === 'string'
+    && typeof v.outcome === 'string'
+    && VALID_OUTCOMES.has(v.outcome)
+    && (v.reason === null || typeof v.reason === 'string')
+    && typeof v.at === 'string';
+};
+
 const isValidLatest = (v: unknown): boolean => {
   if (v === null) return true;
   if (!isPlainObject(v)) return false;
@@ -39,14 +85,23 @@ const isValidEmail = (v: unknown): boolean => {
 // Validate the full shape so loadState() actually delivers on its "safely
 // reset on malformed input" contract. Downstream code calls .trim() / semver
 // parsing on these subfields and would crash on a hand-edited file otherwise.
-const isValid = (raw: unknown): raw is UpdateState => {
+//
+// Tier 2 fields (execution, bootCount, lastResult) MAY be absent on a state
+// file written by a Tier 1 install — those are backfilled at load time.
+// Present-but-malformed values still reject so a hand-edited file with
+// e.g. execution.status="totally-bogus" can't poison RollbackHandler.
+const isValid = (raw: unknown): raw is Partial & object => {
   if (!isPlainObject(raw)) return false;
-  return raw.schemaVersion === 1
-    && isStringOrNull(raw.lastCheckAt)
-    && isStringOrNull(raw.lastEtag)
-    && isValidLatest(raw.latest)
-    && isValidVulnerableBelow(raw.vulnerableBelow)
-    && isValidEmail(raw.email);
+  if (raw.schemaVersion !== 1) return false;
+  if (!isStringOrNull(raw.lastCheckAt)) return false;
+  if (!isStringOrNull(raw.lastEtag)) return false;
+  if (!isValidLatest(raw.latest)) return false;
+  if (!isValidVulnerableBelow(raw.vulnerableBelow)) return false;
+  if (!isValidEmail(raw.email)) return false;
+  if (raw.execution !== undefined && !isValidExecution(raw.execution)) return false;
+  if (raw.bootCount !== undefined && typeof raw.bootCount !== 'number') return false;
+  if (raw.lastResult !== undefined && !isValidLastResult(raw.lastResult)) return false;
+  return true;
 };
 
 /** Reads the on-disk state. Returns a fresh empty-state clone when the file is missing, malformed, or has an unknown schemaVersion. Never throws on parse errors. */
@@ -65,7 +120,17 @@ export const loadState = async (filePath: string): Promise => {
     return structuredClone(EMPTY_STATE);
   }
   if (!isValid(parsed)) return structuredClone(EMPTY_STATE);
-  return parsed;
+  // Backfill Tier 2 fields on a Tier 1 state file. Spread defaults first,
+  // parsed second so explicit values win, then explicit fallback for the
+  // three fields that might be undefined.
+  const partial = parsed as Partial;
+  return {
+    ...structuredClone(EMPTY_STATE),
+    ...partial,
+    execution: partial.execution ?? structuredClone(EMPTY_STATE.execution),
+    bootCount: partial.bootCount ?? 0,
+    lastResult: partial.lastResult ?? null,
+  } as UpdateState;
 };
 
 /** Atomic write via tmp-then-rename. Creates parent directories as needed. */
diff --git a/src/node/updater/trustedKeys.ts b/src/node/updater/trustedKeys.ts
new file mode 100644
index 00000000000..2d50a95c977
--- /dev/null
+++ b/src/node/updater/trustedKeys.ts
@@ -0,0 +1,75 @@
+import {spawn as realSpawn, SpawnOptions} from 'node:child_process';
+import log4js from 'log4js';
+import {isValidTag} from './refSafety';
+
+const logger = log4js.getLogger('updater');
+
+export type SpawnFn = (cmd: string, args: string[], opts: SpawnOptions) => {
+  on: {
+    (event: 'close', cb: (code: number | null) => void): void;
+    (event: 'error', cb: (err: Error) => void): void;
+  };
+};
+
+export interface VerifyArgs {
+  tag: string;
+  repoDir: string;
+  requireSignature: boolean;
+  trustedKeysPath: string | null;
+  /** Override for tests; production callers use the default `child_process.spawn`. */
+  spawnFn?: SpawnFn;
+}
+
+export type VerifyResult =
+  | {ok: true; reason: 'signature-verified' | 'signature-not-required'}
+  | {ok: false; reason: 'signature-verification-failed'};
+
+/**
+ * Verify a release tag's GPG signature via `git verify-tag `.
+ *
+ * With `requireSignature: false` (default) this is a documented no-op:
+ * Etherpad's release process does not yet sign tags consistently, and
+ * forcing verification on by default would break Tier 2 for everyone.
+ * Admins who run their own builds or who pin to signed forks set
+ * `updates.requireSignature: true` and import the trusted keys into the
+ * Etherpad user's keyring (or a dedicated keyring at
+ * `updates.trustedKeysPath`, which is passed to git via $GNUPGHOME).
+ */
+export const verifyReleaseTag = async (args: VerifyArgs): Promise => {
+  if (!args.requireSignature) {
+    logger.warn(
+      `verifyReleaseTag: signature check skipped (updates.requireSignature=false) for ${args.tag}`,
+    );
+    return {ok: true, reason: 'signature-not-required'};
+  }
+  // Reject unsafe tag strings before they ever reach git. A tag starting with
+  // '-' could otherwise be parsed as a git option, bypassing verification.
+  if (!isValidTag(args.tag)) {
+    logger.error(`verifyReleaseTag: refused unsafe tag ${JSON.stringify(args.tag)}`);
+    return {ok: false, reason: 'signature-verification-failed'};
+  }
+  const spawnFn = args.spawnFn ?? (realSpawn as unknown as SpawnFn);
+  const env: NodeJS.ProcessEnv = {...process.env};
+  if (args.trustedKeysPath) env.GNUPGHOME = args.trustedKeysPath;
+  // -- terminates options so even a future tag-validation regression can't
+  // smuggle a flag past git verify-tag.
+  const child = spawnFn('git', ['verify-tag', '--', args.tag], {
+    cwd: args.repoDir,
+    env,
+    stdio: 'ignore',
+  });
+  // Listen for both 'close' and 'error' so a missing/unexecutable git binary
+  // surfaces as verification-failure rather than a hung promise.
+  const code: number | null = await new Promise((resolve) => {
+    let settled = false;
+    const settle = (c: number | null) => { if (settled) return; settled = true; resolve(c); };
+    child.on('close', settle);
+    child.on('error', (err: Error) => {
+      logger.error(`verifyReleaseTag: git verify-tag spawn error: ${err.message}`);
+      settle(1);
+    });
+  });
+  if (code === 0) return {ok: true, reason: 'signature-verified'};
+  logger.error(`verifyReleaseTag: git verify-tag ${args.tag} exited ${code}`);
+  return {ok: false, reason: 'signature-verification-failed'};
+};
diff --git a/src/node/updater/types.ts b/src/node/updater/types.ts
index d96c8e384cc..130ab02784e 100644
--- a/src/node/updater/types.ts
+++ b/src/node/updater/types.ts
@@ -45,6 +45,45 @@ export interface EmailSendLog {
   vulnerableNewReleaseTag: string | null;
 }
 
+/**
+ * Discriminated union mirroring the state machine in
+ * docs/superpowers/specs/2026-04-25-auto-update-design.md (section "State machine").
+ *
+ * `rollback-failed` is the only terminal state that disables auto/autonomous
+ * attempts globally until POST /admin/update/acknowledge clears it. Manual
+ * remains permitted because an admin clicking Apply *is* the intervention.
+ */
+export type ExecutionStatus =
+  | {status: 'idle'}
+  | {status: 'preflight'; targetTag: string; startedAt: string}
+  | {status: 'preflight-failed'; targetTag: string; reason: string; at: string}
+  | {status: 'draining'; targetTag: string; drainEndsAt: string; startedAt: string}
+  | {status: 'executing'; targetTag: string; fromSha: string; startedAt: string}
+  | {status: 'pending-verification'; targetTag: string; fromSha: string; deadlineAt: string}
+  | {status: 'verified'; targetTag: string; verifiedAt: string}
+  | {status: 'rolling-back'; reason: string; targetTag: string; fromSha: string; at: string}
+  | {status: 'rolled-back'; reason: string; targetTag: string; restoredSha: string; at: string}
+  | {status: 'rollback-failed'; reason: string; targetTag: string; fromSha: string; at: string};
+
+/** All recognised execution statuses — used by the state validator. */
+export const EXECUTION_STATUSES = [
+  'idle', 'preflight', 'preflight-failed', 'draining', 'executing',
+  'pending-verification', 'verified', 'rolling-back', 'rolled-back', 'rollback-failed',
+] as const;
+
+export type LastUpdateResult = {
+  /** Tag we were updating to. */
+  targetTag: string;
+  /** SHA we were updating from. Empty string when the run never reached executor (e.g. preflight-failed). */
+  fromSha: string;
+  /** Outcome to surface in admin UI. */
+  outcome: 'verified' | 'rolled-back' | 'rollback-failed' | 'preflight-failed' | 'cancelled';
+  /** Human-readable reason on non-success. */
+  reason: string | null;
+  /** ISO timestamp when this result was finalised. */
+  at: string;
+} | null;
+
 export interface UpdateState {
   /** Schema version of this file. Increment when fields change. */
   schemaVersion: 1;
@@ -58,6 +97,15 @@ export interface UpdateState {
   vulnerableBelow: VulnerableBelowDirective[];
   /** Email send dedupe state. */
   email: EmailSendLog;
+  /** Current in-flight execution state. Persisted so a restart mid-update reaches RollbackHandler. */
+  execution: ExecutionStatus;
+  /**
+   * Boot counter that the RollbackHandler increments while a `pending-verification`
+   * status is live. > 2 means the new version crash-looped; force rollback regardless of timer.
+   */
+  bootCount: number;
+  /** Most recent terminal outcome, surfaced in admin UI even after `execution` returns to idle. */
+  lastResult: LastUpdateResult;
 }
 
 /** Zero-value initial state. Treat as immutable — spread before mutating: `{...EMPTY_STATE, lastCheckAt: x}`. */
@@ -72,4 +120,7 @@ export const EMPTY_STATE: UpdateState = {
     vulnerableAt: null,
     vulnerableNewReleaseTag: null,
   },
+  execution: {status: 'idle'},
+  bootCount: 0,
+  lastResult: null,
 };
diff --git a/src/node/updater/updateLog.ts b/src/node/updater/updateLog.ts
new file mode 100644
index 00000000000..f0c3b58b0f6
--- /dev/null
+++ b/src/node/updater/updateLog.ts
@@ -0,0 +1,82 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+
+const DEFAULT_MAX_BYTES = 10 * 1024 * 1024;
+const DEFAULT_BACKUPS = 5;
+
+/**
+ * Rotate `` when it exceeds `maxBytes`:
+ *   .{n-1} -> .n  (oldest dropped)
+ *          -> .1
+ * No-op when the file is missing or under the limit.
+ */
+export const rotateIfNeeded = async (
+  logPath: string,
+  maxBytes = DEFAULT_MAX_BYTES,
+  backups = DEFAULT_BACKUPS,
+): Promise => {
+  let size = 0;
+  try { size = (await fs.stat(logPath)).size; } catch (err: any) {
+    if (err.code === 'ENOENT') return;
+    throw err;
+  }
+  if (size < maxBytes) return;
+
+  // Drop the oldest. Walk from highest index down so the rename chain lands cleanly.
+  for (let i = backups - 1; i >= 1; i--) {
+    const src = `${logPath}.${i}`;
+    const dst = `${logPath}.${i + 1}`;
+    try { await fs.rename(src, dst); }
+    catch (err: any) { if (err.code !== 'ENOENT') throw err; }
+  }
+  // Current file becomes .1.
+  try { await fs.rename(logPath, `${logPath}.1`); }
+  catch (err: any) { if (err.code !== 'ENOENT') throw err; }
+};
+
+/**
+ * Append `line` to ``, rotating first if the file is over the size cap.
+ * Creates parent directories as needed. The line is newline-terminated; do not
+ * include a trailing newline in `line`.
+ *
+ * Best-effort: swallows fs errors silently. Update logging must never break the
+ * update flow itself, and errors are already surfaced via log4js by callers.
+ */
+export const appendLine = async (
+  logPath: string,
+  line: string,
+  maxBytes = DEFAULT_MAX_BYTES,
+  backups = DEFAULT_BACKUPS,
+): Promise => {
+  try {
+    await fs.mkdir(path.dirname(logPath), {recursive: true});
+    await rotateIfNeeded(logPath, maxBytes, backups);
+    await fs.appendFile(logPath, `${line}\n`);
+  } catch {
+    // ignore — caller is fire-and-forget logging
+  }
+};
+
+/** Same as appendLine but throws on error — used by tests that want to assert disk failures surface. */
+export const appendLineStrict = async (
+  logPath: string,
+  line: string,
+  maxBytes = DEFAULT_MAX_BYTES,
+  backups = DEFAULT_BACKUPS,
+): Promise => {
+  await fs.mkdir(path.dirname(logPath), {recursive: true});
+  await rotateIfNeeded(logPath, maxBytes, backups);
+  await fs.appendFile(logPath, `${line}\n`);
+};
+
+/** Read the last `n` newline-separated lines from the active log file. Empty array if missing. */
+export const tailLines = async (logPath: string, n: number): Promise => {
+  if (n <= 0) return [];
+  let raw: string;
+  try { raw = await fs.readFile(logPath, 'utf8'); }
+  catch (err: any) { if (err.code === 'ENOENT') return []; throw err; }
+  const stripped = raw.endsWith('\n') ? raw.slice(0, -1) : raw;
+  if (stripped.length === 0) return [];
+  const all = stripped.split('\n');
+  return all.slice(Math.max(0, all.length - n));
+};
diff --git a/src/node/utils/Settings.ts b/src/node/utils/Settings.ts
index 3b5e9790f9c..f091577feec 100644
--- a/src/node/utils/Settings.ts
+++ b/src/node/utils/Settings.ts
@@ -331,6 +331,15 @@ export type SettingsType = {
     checkIntervalHours: number,
     githubRepo: string,
     requireAdminForStatus: boolean,
+    /** Tier 2+ knobs. Default 0 in PR 2; tier 3 makes preApplyGraceMinutes meaningful. */
+    preApplyGraceMinutes: number,
+    drainSeconds: number,
+    rollbackHealthCheckSeconds: number,
+    diskSpaceMinMB: number,
+    /** When true, refuse updates whose tag is not signed by a trusted key. */
+    requireSignature: boolean,
+    /** Override the OS keyring location (passed to git verify-tag via $GNUPGHOME). */
+    trustedKeysPath: string | null,
   },
   adminEmail: string | null,
   getPublicSettings: () => Pick,
@@ -515,6 +524,13 @@ const settings: SettingsType = {
     // Set true to require an authenticated admin session for the endpoint without
     // disabling the updater itself.
     requireAdminForStatus: false,
+    // Tier 2+ knobs. Only meaningful at tier "manual" or higher.
+    preApplyGraceMinutes: 0,
+    drainSeconds: 60,
+    rollbackHealthCheckSeconds: 60,
+    diskSpaceMinMB: 500,
+    requireSignature: false,
+    trustedKeysPath: null,
   },
   /**
    * Contact address for admin notifications (updates, future security advisories).
diff --git a/src/static/js/pad.ts b/src/static/js/pad.ts
index 6070fb8944f..26234bfef17 100644
--- a/src/static/js/pad.ts
+++ b/src/static/js/pad.ts
@@ -401,13 +401,19 @@ const handshake = async () => {
       // gritter so the user doesn't see a confusing duplicate.
       if (typeof msgObj.messageKey === 'string'
           && msgObj.messageKey.startsWith('pad.deletionToken.')) return;
-      const text = msgObj.messageKey ? html10n.get(msgObj.messageKey) : msgObj.message;
+      // Updater drain announcements get their own title and dodge the generic
+      // "Admin message" framing so the user knows it's a system event.
+      const isUpdate = typeof msgObj.messageKey === 'string'
+          && msgObj.messageKey.startsWith('update.drain.');
+      const text = msgObj.messageKey
+          ? html10n.get(msgObj.messageKey, msgObj.values || {})
+          : msgObj.message;
       if (!text) return;
       const date = new Date(payload.timestamp);
       $.gritter.add({
-        title: 'Admin message',
+        title: isUpdate ? html10n.get('update.banner.title') : 'Admin message',
         text: '[' + date.toLocaleTimeString() + ']: ' + text,
-        sticky: msgObj.sticky
+        sticky: !!msgObj.sticky
       });
     }
   })
diff --git a/src/tests/backend-new/specs/updater/RollbackHandler.test.ts b/src/tests/backend-new/specs/updater/RollbackHandler.test.ts
new file mode 100644
index 00000000000..8a30cbcb769
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/RollbackHandler.test.ts
@@ -0,0 +1,203 @@
+import {describe, it, expect, vi, beforeEach, afterEach} from 'vitest';
+import {checkPendingVerification, performRollback, RollbackDeps} from '../../../../node/updater/RollbackHandler';
+import {EMPTY_STATE} from '../../../../node/updater/types';
+
+const okSpawn = (exit: number) => vi.fn(() => ({
+  stdout: {on: () => {}},
+  stderr: {on: () => {}},
+  on: (e: string, cb: any) => { if (e === 'close') setImmediate(() => cb(exit)); },
+})) as any;
+
+const baseDeps = (): RollbackDeps => ({
+  repoDir: '/srv/etherpad',
+  backupDir: '/srv/etherpad/var/update-backup',
+  spawnFn: okSpawn(0),
+  copyFile: vi.fn(async () => {}),
+  saveState: vi.fn(async () => {}),
+  exit: vi.fn(),
+  now: () => new Date('2026-05-08T10:00:00Z'),
+  rollbackHealthCheckSeconds: 60,
+});
+
+describe('checkPendingVerification', () => {
+  beforeEach(() => { vi.useFakeTimers(); });
+  afterEach(() => { vi.useRealTimers(); });
+
+  it('idle state is a no-op (timer is not armed)', () => {
+    const r = checkPendingVerification(structuredClone(EMPTY_STATE), baseDeps());
+    expect(r.armed).toBe(false);
+  });
+
+  it('pending-verification with bootCount<=2 arms a timer and increments bootCount', async () => {
+    const deps = baseDeps();
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'pending-verification' as const,
+        targetTag: 'v2.7.3',
+        fromSha: 'abc',
+        deadlineAt: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    const r = checkPendingVerification(state, deps);
+    expect(r.armed).toBe(true);
+    expect(deps.saveState).toHaveBeenCalledWith(expect.objectContaining({bootCount: 1}));
+    // markVerified clears the timer; advancing past the deadline does NOT trigger rollback.
+    r.markVerified();
+    await vi.advanceTimersByTimeAsync(60_000);
+    await vi.runAllTimersAsync();
+    expect(deps.exit).not.toHaveBeenCalled();
+  });
+
+  it('markVerified persists the verified state with lastResult=verified', () => {
+    const deps = baseDeps();
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'pending-verification' as const,
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        deadlineAt: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    const r = checkPendingVerification(state, deps);
+    r.markVerified();
+    const lastSave = (deps.saveState as any).mock.calls.at(-1)[0];
+    expect(lastSave.execution.status).toBe('verified');
+    expect(lastSave.lastResult.outcome).toBe('verified');
+    expect(lastSave.bootCount).toBe(0);
+  });
+
+  it('pending-verification with bootCount>2 forces immediate rollback', async () => {
+    const deps = baseDeps();
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'pending-verification' as const,
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        deadlineAt: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 3,
+    };
+    const r = checkPendingVerification(state, deps);
+    expect(r.armed).toBe(false);
+    await vi.runAllTimersAsync();
+    expect(deps.exit).toHaveBeenCalledWith(75);
+  });
+
+  it('timer expiry triggers rollback when markVerified is never called', async () => {
+    const deps = baseDeps();
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'pending-verification' as const,
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        deadlineAt: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    const r = checkPendingVerification(state, deps);
+    expect(r.armed).toBe(true);
+    await vi.advanceTimersByTimeAsync(60_000);
+    await vi.runAllTimersAsync();
+    expect(deps.exit).toHaveBeenCalledWith(75);
+  });
+});
+
+describe('performRollback', () => {
+  it('happy path: restores lockfile, checks out fromSha, retries pnpm install, exits 75', async () => {
+    const deps = baseDeps();
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'rolling-back' as const,
+        reason: 'install-failed',
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        at: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    await performRollback(state, deps);
+    expect(deps.copyFile).toHaveBeenCalledWith(
+      '/srv/etherpad/var/update-backup/pnpm-lock.yaml',
+      '/srv/etherpad/pnpm-lock.yaml',
+    );
+    const lastSave = (deps.saveState as any).mock.calls.at(-1)[0];
+    expect(lastSave.execution.status).toBe('rolled-back');
+    expect(lastSave.lastResult.outcome).toBe('rolled-back');
+    expect(deps.exit).toHaveBeenCalledWith(75);
+  });
+
+  it('rollback failure (lockfile copy throws) lands on rollback-failed terminal', async () => {
+    const deps = baseDeps();
+    deps.copyFile = vi.fn(async () => { throw new Error('EACCES'); });
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'rolling-back' as const,
+        reason: 'install-failed',
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        at: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    await performRollback(state, deps);
+    const lastSave = (deps.saveState as any).mock.calls.at(-1)[0];
+    expect(lastSave.execution.status).toBe('rollback-failed');
+    expect(lastSave.lastResult.outcome).toBe('rollback-failed');
+    expect(deps.exit).toHaveBeenCalledWith(75);
+  });
+
+  it('rollback failure (git checkout exits non-zero) lands on rollback-failed', async () => {
+    const deps = baseDeps();
+    let calls = 0;
+    deps.spawnFn = vi.fn(() => ({
+      stdout: {on: () => {}},
+      stderr: {on: () => {}},
+      on: (e: string, cb: any) => { if (e === 'close') setImmediate(() => cb(calls++ === 0 ? 1 : 0)); },
+    })) as any;
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'rolling-back' as const,
+        reason: 'build-failed',
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        at: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    await performRollback(state, deps);
+    const lastSave = (deps.saveState as any).mock.calls.at(-1)[0];
+    expect(lastSave.execution.status).toBe('rollback-failed');
+  });
+
+  it('rollback failure (pnpm install exits non-zero) lands on rollback-failed', async () => {
+    const deps = baseDeps();
+    let calls = 0;
+    deps.spawnFn = vi.fn(() => ({
+      stdout: {on: () => {}},
+      stderr: {on: () => {}},
+      on: (e: string, cb: any) => { if (e === 'close') setImmediate(() => cb(calls++ === 0 ? 0 : 1)); },
+    })) as any;
+    const state = {
+      ...structuredClone(EMPTY_STATE),
+      execution: {
+        status: 'rolling-back' as const,
+        reason: 'build-failed',
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        at: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 0,
+    };
+    await performRollback(state, deps);
+    const lastSave = (deps.saveState as any).mock.calls.at(-1)[0];
+    expect(lastSave.execution.status).toBe('rollback-failed');
+  });
+
+  it('throws when called from an unexpected status', async () => {
+    const deps = baseDeps();
+    const state = structuredClone(EMPTY_STATE);
+    await expect(performRollback(state, deps)).rejects.toThrow();
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/SessionDrainer.test.ts b/src/tests/backend-new/specs/updater/SessionDrainer.test.ts
new file mode 100644
index 00000000000..8d005035ad4
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/SessionDrainer.test.ts
@@ -0,0 +1,110 @@
+import {describe, it, expect, vi, beforeEach, afterEach} from 'vitest';
+import {createDrainer, isAcceptingConnections, _resetForTests} from '../../../../node/updater/SessionDrainer';
+
+describe('SessionDrainer', () => {
+  beforeEach(() => { vi.useFakeTimers(); _resetForTests(); });
+  afterEach(() => { vi.useRealTimers(); _resetForTests(); });
+
+  it('emits T-60, T-30, T-10 in order and resolves at T=0', async () => {
+    const broadcasts: string[] = [];
+    const drainer = createDrainer({
+      drainSeconds: 60,
+      broadcast: (key) => { broadcasts.push(key); },
+    });
+    const done = drainer.start();
+    expect(broadcasts).toEqual(['update.drain.t60']);
+    await vi.advanceTimersByTimeAsync(30_000);
+    expect(broadcasts).toEqual(['update.drain.t60', 'update.drain.t30']);
+    await vi.advanceTimersByTimeAsync(20_000);
+    expect(broadcasts).toEqual(['update.drain.t60', 'update.drain.t30', 'update.drain.t10']);
+    await vi.advanceTimersByTimeAsync(10_000);
+    const r = await done;
+    expect(r).toEqual({outcome: 'completed'});
+  });
+
+  it('flips isAcceptingConnections to false during drain and back on cancel', () => {
+    const drainer = createDrainer({drainSeconds: 60, broadcast: () => {}});
+    expect(isAcceptingConnections()).toBe(true);
+    drainer.start();
+    expect(isAcceptingConnections()).toBe(false);
+    drainer.cancel();
+    expect(isAcceptingConnections()).toBe(true);
+  });
+
+  it('restores isAcceptingConnections to true on drain completion', async () => {
+    const drainer = createDrainer({drainSeconds: 60, broadcast: () => {}});
+    const done = drainer.start();
+    expect(isAcceptingConnections()).toBe(false);
+    await vi.advanceTimersByTimeAsync(60_000);
+    await done;
+    // Restored at completion so a downstream throw doesn't wedge join handshakes.
+    expect(isAcceptingConnections()).toBe(true);
+  });
+
+  it('cancel before T=0 resolves start() promise as cancelled', async () => {
+    const drainer = createDrainer({drainSeconds: 60, broadcast: () => {}});
+    const done = drainer.start();
+    await vi.advanceTimersByTimeAsync(20_000);
+    drainer.cancel();
+    const r = await done;
+    expect(r).toEqual({outcome: 'cancelled'});
+  });
+
+  it('cancel does not fire any further broadcasts', async () => {
+    const broadcasts: string[] = [];
+    const drainer = createDrainer({
+      drainSeconds: 60,
+      broadcast: (key) => { broadcasts.push(key); },
+    });
+    drainer.start();
+    expect(broadcasts).toEqual(['update.drain.t60']);
+    drainer.cancel();
+    await vi.advanceTimersByTimeAsync(60_000);
+    expect(broadcasts).toEqual(['update.drain.t60']);
+  });
+
+  it('passes seconds-remaining in broadcast values', async () => {
+    const seen: Array<{key: string; values: any}> = [];
+    const drainer = createDrainer({
+      drainSeconds: 60,
+      broadcast: (key, values) => { seen.push({key, values}); },
+    });
+    drainer.start();
+    expect(seen[0]).toEqual({key: 'update.drain.t60', values: {seconds: 60}});
+    await vi.advanceTimersByTimeAsync(30_000);
+    expect(seen[1]).toEqual({key: 'update.drain.t30', values: {seconds: 30}});
+    await vi.advanceTimersByTimeAsync(20_000);
+    expect(seen[2]).toEqual({key: 'update.drain.t10', values: {seconds: 10}});
+  });
+
+  it('drainSeconds=15 skips t30 (window too short) but still fires t10', async () => {
+    const seen: Array<{key: string; values: any}> = [];
+    const drainer = createDrainer({
+      drainSeconds: 15,
+      broadcast: (key, values) => { seen.push({key, values}); },
+    });
+    const done = drainer.start();
+    // Opening announcement reports the configured drain length, not a fixed 60.
+    expect(seen).toEqual([{key: 'update.drain.t60', values: {seconds: 15}}]);
+    // t30 is suppressed because reporting "30 seconds" would be wrong.
+    await vi.advanceTimersByTimeAsync(5_000);
+    expect(seen.map((s) => s.key)).not.toContain('update.drain.t30');
+    // t10 fires when 10 seconds remain (= 5s from start of a 15s drain).
+    expect(seen.map((s) => s.key)).toContain('update.drain.t10');
+    await vi.advanceTimersByTimeAsync(10_000);
+    await done;
+  });
+
+  it('drainSeconds=5 skips both t30 and t10', async () => {
+    const seen: string[] = [];
+    const drainer = createDrainer({
+      drainSeconds: 5,
+      broadcast: (key) => { seen.push(key); },
+    });
+    const done = drainer.start();
+    expect(seen).toEqual(['update.drain.t60']);
+    await vi.advanceTimersByTimeAsync(5_000);
+    await done;
+    expect(seen).toEqual(['update.drain.t60']); // only the opening announcement
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/UpdateExecutor.test.ts b/src/tests/backend-new/specs/updater/UpdateExecutor.test.ts
new file mode 100644
index 00000000000..18fa118fc1b
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/UpdateExecutor.test.ts
@@ -0,0 +1,145 @@
+import {describe, it, expect, vi, beforeEach} from 'vitest';
+import {executeUpdate, ExecutorDeps} from '../../../../node/updater/UpdateExecutor';
+import {EMPTY_STATE, UpdateState} from '../../../../node/updater/types';
+
+interface ScriptStep {cmd: string; exit: number; stderr?: string}
+
+const okSpawn = (script: ScriptStep[]) => {
+  let i = 0;
+  return vi.fn((cmd: string, args: string[]) => {
+    const step = script[i++];
+    if (!step) throw new Error(`Unexpected spawn call: ${cmd} ${args.join(' ')}`);
+    const expected = step.cmd;
+    const actual = `${cmd} ${args.join(' ')}`;
+    if (expected !== actual) {
+      throw new Error(`Spawn order mismatch: expected "${expected}", got "${actual}"`);
+    }
+    return {
+      stdout: {on: () => {}},
+      stderr: {on: (e: string, cb: any) => { if (e === 'data' && step.stderr) cb(Buffer.from(step.stderr)); }},
+      on: (e: string, cb: any) => { if (e === 'close') setImmediate(() => cb(step.exit)); },
+    };
+  });
+};
+
+const baseDeps = (): {
+  deps: ExecutorDeps;
+  states: UpdateState[];
+  copies: Array<{src: string; dst: string}>;
+  exitedWith: {code: number | null};
+  fromShaUsed: {value: string | null};
+} => {
+  const states: UpdateState[] = [];
+  const copies: Array<{src: string; dst: string}> = [];
+  const exitedWith = {code: null as number | null};
+  const fromShaUsed = {value: null as string | null};
+  return {
+    deps: {
+      repoDir: '/srv/etherpad',
+      backupDir: '/srv/etherpad/var/update-backup',
+      spawnFn: okSpawn([
+        {cmd: 'git fetch --tags origin', exit: 0},
+        {cmd: 'git checkout refs/tags/v2.7.3', exit: 0},
+        {cmd: 'pnpm install --frozen-lockfile', exit: 0},
+        {cmd: 'pnpm run build:ui', exit: 0},
+      ]) as any,
+      readSha: vi.fn(async () => { fromShaUsed.value = 'abc123'; return 'abc123'; }),
+      copyFile: vi.fn(async (src: string, dst: string) => { copies.push({src, dst}); }),
+      saveState: vi.fn(async (s: UpdateState) => { states.push(structuredClone(s)); }),
+      initialState: structuredClone(EMPTY_STATE),
+      targetTag: 'v2.7.3',
+      now: () => new Date('2026-05-08T10:00:00Z'),
+      exit: (code: number) => { exitedWith.code = code; },
+    },
+    states,
+    copies,
+    exitedWith,
+    fromShaUsed,
+  };
+};
+
+describe('executeUpdate', () => {
+  it('happy path: snapshots, runs steps, persists pending-verification, exits 75', async () => {
+    const {deps, states, copies, exitedWith} = baseDeps();
+    const r = await executeUpdate(deps);
+    expect(r).toEqual({outcome: 'pending-verification'});
+    expect(copies).toEqual([
+      {src: '/srv/etherpad/pnpm-lock.yaml', dst: '/srv/etherpad/var/update-backup/pnpm-lock.yaml'},
+    ]);
+    expect(states.at(-1)?.execution.status).toBe('pending-verification');
+    expect((states.at(-1)?.execution as any).fromSha).toBe('abc123');
+    expect(states.at(-1)?.bootCount).toBe(0);
+    expect(exitedWith.code).toBe(75);
+  });
+
+  it('records the executing -> pending-verification transition in saveState calls', async () => {
+    const {deps, states} = baseDeps();
+    await executeUpdate(deps);
+    const statuses = states.map((s) => s.execution.status);
+    expect(statuses[0]).toBe('executing');
+    expect(statuses.at(-1)).toBe('pending-verification');
+  });
+
+  it('install failure flips state to rolling-back without exiting', async () => {
+    const {deps, states, exitedWith} = baseDeps();
+    deps.spawnFn = okSpawn([
+      {cmd: 'git fetch --tags origin', exit: 0},
+      {cmd: 'git checkout refs/tags/v2.7.3', exit: 0},
+      {cmd: 'pnpm install --frozen-lockfile', exit: 1, stderr: 'resolver bork'},
+    ]) as any;
+    const r = await executeUpdate(deps);
+    expect(r.outcome).toBe('failed-install');
+    expect(states.at(-1)?.execution.status).toBe('rolling-back');
+    expect((states.at(-1)?.execution as any).reason).toContain('pnpm install exit 1');
+    expect(exitedWith.code).toBeNull(); // executor must not exit on failure paths
+  });
+
+  it('build failure flips state to rolling-back', async () => {
+    const {deps, states, exitedWith} = baseDeps();
+    deps.spawnFn = okSpawn([
+      {cmd: 'git fetch --tags origin', exit: 0},
+      {cmd: 'git checkout refs/tags/v2.7.3', exit: 0},
+      {cmd: 'pnpm install --frozen-lockfile', exit: 0},
+      {cmd: 'pnpm run build:ui', exit: 2, stderr: 'tsc bork'},
+    ]) as any;
+    const r = await executeUpdate(deps);
+    expect(r.outcome).toBe('failed-build');
+    expect(states.at(-1)?.execution.status).toBe('rolling-back');
+    expect(exitedWith.code).toBeNull();
+  });
+
+  it('checkout failure flips state to rolling-back (no copyFile? actually copies first)', async () => {
+    // copyFile is called before any spawn; checkout is the second spawn so by then the
+    // backup lockfile is in place. This matters: rollback needs the backup to exist.
+    const {deps, copies, states} = baseDeps();
+    deps.spawnFn = okSpawn([
+      {cmd: 'git fetch --tags origin', exit: 0},
+      {cmd: 'git checkout refs/tags/v2.7.3', exit: 1, stderr: 'conflict'},
+    ]) as any;
+    const r = await executeUpdate(deps);
+    expect(r.outcome).toBe('failed-checkout');
+    expect(copies.length).toBe(1); // backup taken before any mutation
+    expect(states.at(-1)?.execution.status).toBe('rolling-back');
+  });
+
+  it('git-fetch failure flips state to rolling-back', async () => {
+    const {deps, states} = baseDeps();
+    deps.spawnFn = okSpawn([
+      {cmd: 'git fetch --tags origin', exit: 128, stderr: 'cannot reach origin'},
+    ]) as any;
+    const r = await executeUpdate(deps);
+    expect(r.outcome).toBe('failed-checkout');
+    expect(states.at(-1)?.execution.status).toBe('rolling-back');
+  });
+
+  it('captures fromSha into the rolling-back state so RollbackHandler can restore it', async () => {
+    const {deps, states} = baseDeps();
+    deps.spawnFn = okSpawn([
+      {cmd: 'git fetch --tags origin', exit: 0},
+      {cmd: 'git checkout refs/tags/v2.7.3', exit: 0},
+      {cmd: 'pnpm install --frozen-lockfile', exit: 1},
+    ]) as any;
+    await executeUpdate(deps);
+    expect((states.at(-1)?.execution as any).fromSha).toBe('abc123');
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts b/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts
index 6dfd0f95451..3eb74ef01bf 100644
--- a/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts
+++ b/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts
@@ -62,3 +62,34 @@ describe('evaluatePolicy', () => {
     expect(r.reason).toBe('up-to-date');
   });
 });
+
+describe('evaluatePolicy terminal-state gating', () => {
+  it('rollback-failed denies auto/autonomous but keeps manual on', () => {
+    const r = evaluatePolicy({
+      ...baseInput, tier: 'autonomous',
+      executionStatus: 'rollback-failed',
+    });
+    expect(r.canNotify).toBe(true);
+    expect(r.canManual).toBe(true);
+    expect(r.canAuto).toBe(false);
+    expect(r.canAutonomous).toBe(false);
+    expect(r.reason).toBe('rollback-failed-terminal');
+  });
+
+  it('idle execution behaves identically to no-status', () => {
+    const r = evaluatePolicy({...baseInput, tier: 'autonomous', executionStatus: 'idle'});
+    expect(r.canManual).toBe(true);
+    expect(r.canAuto).toBe(true);
+    expect(r.canAutonomous).toBe(true);
+    expect(r.reason).toBe('ok');
+  });
+
+  it('preflight-failed does NOT block manual / auto (it is informational only)', () => {
+    const r = evaluatePolicy({
+      ...baseInput, tier: 'autonomous', executionStatus: 'preflight-failed',
+    });
+    expect(r.canManual).toBe(true);
+    expect(r.canAuto).toBe(true);
+    expect(r.canAutonomous).toBe(true);
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/lock.test.ts b/src/tests/backend-new/specs/updater/lock.test.ts
new file mode 100644
index 00000000000..adf3c61bf99
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/lock.test.ts
@@ -0,0 +1,69 @@
+import {describe, it, expect, beforeEach, afterEach} from 'vitest';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import {acquireLock, releaseLock, isHeld} from '../../../../node/updater/lock';
+
+describe('update lock', () => {
+  let dir: string;
+  let lockPath: string;
+
+  beforeEach(async () => {
+    dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-lock-'));
+    lockPath = path.join(dir, 'update.lock');
+  });
+
+  afterEach(async () => {
+    await fs.rm(dir, {recursive: true, force: true});
+  });
+
+  it('acquires and releases', async () => {
+    expect(await acquireLock(lockPath)).toBe(true);
+    expect(await isHeld(lockPath)).toBe(true);
+    await releaseLock(lockPath);
+    expect(await isHeld(lockPath)).toBe(false);
+  });
+
+  it('rejects a second acquire while live', async () => {
+    expect(await acquireLock(lockPath)).toBe(true);
+    expect(await acquireLock(lockPath)).toBe(false);
+    await releaseLock(lockPath);
+  });
+
+  it('reaps a stale lock whose PID is gone', async () => {
+    // Pick a PID that almost certainly does not exist. process.kill(pid, 0) on
+    // a free PID returns ESRCH which the implementation treats as stale.
+    await fs.writeFile(lockPath, JSON.stringify({pid: 2147483646, at: new Date().toISOString()}));
+    expect(await acquireLock(lockPath)).toBe(true);
+    await releaseLock(lockPath);
+  });
+
+  it('treats an unparseable lock file as stale', async () => {
+    await fs.writeFile(lockPath, 'garbage');
+    expect(await acquireLock(lockPath)).toBe(true);
+    await releaseLock(lockPath);
+  });
+
+  it('treats a lock missing required fields as stale', async () => {
+    await fs.writeFile(lockPath, JSON.stringify({somethingElse: true}));
+    expect(await acquireLock(lockPath)).toBe(true);
+    await releaseLock(lockPath);
+  });
+
+  it('release is idempotent (no error when file absent)', async () => {
+    await releaseLock(lockPath); // file never existed
+    expect(await isHeld(lockPath)).toBe(false);
+  });
+
+  it('isHeld returns false for a stale lock', async () => {
+    await fs.writeFile(lockPath, JSON.stringify({pid: 2147483646, at: new Date().toISOString()}));
+    expect(await isHeld(lockPath)).toBe(false);
+  });
+
+  it('creates parent directory if missing', async () => {
+    const nested = path.join(dir, 'a', 'b', 'update.lock');
+    expect(await acquireLock(nested)).toBe(true);
+    expect(await isHeld(nested)).toBe(true);
+    await releaseLock(nested);
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/preflight.test.ts b/src/tests/backend-new/specs/updater/preflight.test.ts
new file mode 100644
index 00000000000..5926c7864bd
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/preflight.test.ts
@@ -0,0 +1,78 @@
+import {describe, it, expect, vi} from 'vitest';
+import {runPreflight, PreflightDeps} from '../../../../node/updater/preflight';
+import type {VerifyResult} from '../../../../node/updater/trustedKeys';
+
+const baseDeps = (): PreflightDeps => ({
+  installMethod: 'git',
+  workingTreeClean: vi.fn(async () => true),
+  freeDiskMB: vi.fn(async () => 5000),
+  pnpmOnPath: vi.fn(async () => true),
+  lockHeld: vi.fn(async () => false),
+  remoteHasTag: vi.fn(async () => true),
+  verifyTag: vi.fn(async (): Promise => ({ok: true, reason: 'signature-not-required'})),
+});
+
+const baseInput = {
+  targetTag: 'v2.7.3',
+  diskSpaceMinMB: 500,
+  requireSignature: false,
+  trustedKeysPath: null as string | null,
+};
+
+describe('runPreflight', () => {
+  it('passes when all checks pass', async () => {
+    const r = await runPreflight(baseInput, baseDeps());
+    expect(r).toEqual({ok: true});
+  });
+
+  it('rejects non-writable install methods', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), installMethod: 'docker'});
+    expect(r).toEqual({ok: false, reason: 'install-method-not-writable'});
+  });
+
+  it('rejects npm install method too (not yet writable)', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), installMethod: 'npm'});
+    expect(r).toEqual({ok: false, reason: 'install-method-not-writable'});
+  });
+
+  it('rejects a dirty working tree', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), workingTreeClean: vi.fn(async () => false)});
+    expect(r).toEqual({ok: false, reason: 'dirty-working-tree'});
+  });
+
+  it('rejects insufficient disk space', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), freeDiskMB: vi.fn(async () => 100)});
+    expect(r).toEqual({ok: false, reason: 'low-disk-space'});
+  });
+
+  it('rejects when pnpm is missing', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), pnpmOnPath: vi.fn(async () => false)});
+    expect(r).toEqual({ok: false, reason: 'pnpm-not-found'});
+  });
+
+  it('rejects when the lock is held', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), lockHeld: vi.fn(async () => true)});
+    expect(r).toEqual({ok: false, reason: 'lock-held'});
+  });
+
+  it('rejects when the remote tag is missing', async () => {
+    const r = await runPreflight(baseInput, {...baseDeps(), remoteHasTag: vi.fn(async () => false)});
+    expect(r).toEqual({ok: false, reason: 'remote-tag-missing'});
+  });
+
+  it('rejects when signature verification fails', async () => {
+    const r = await runPreflight(baseInput, {
+      ...baseDeps(),
+      verifyTag: vi.fn(async (): Promise => ({ok: false, reason: 'signature-verification-failed'})),
+    });
+    expect(r).toEqual({ok: false, reason: 'signature-verification-failed'});
+  });
+
+  it('cheap-check failures short-circuit before slow checks', async () => {
+    const deps = {...baseDeps(), installMethod: 'docker' as const,
+      remoteHasTag: vi.fn(async () => true)};
+    const r = await runPreflight(baseInput, deps);
+    expect(r.ok).toBe(false);
+    expect(deps.remoteHasTag).not.toHaveBeenCalled();
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/refSafety.test.ts b/src/tests/backend-new/specs/updater/refSafety.test.ts
new file mode 100644
index 00000000000..2f0032ba185
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/refSafety.test.ts
@@ -0,0 +1,63 @@
+import {describe, it, expect} from 'vitest';
+import {isValidTag, assertValidTag, refsTagsForm} from '../../../../node/updater/refSafety';
+
+describe('isValidTag', () => {
+  it('accepts plain semver tags', () => {
+    expect(isValidTag('v2.7.3')).toBe(true);
+    expect(isValidTag('2.7.3')).toBe(true);
+    expect(isValidTag('2.7.3-rc.1')).toBe(true);
+  });
+
+  it('rejects tags starting with -', () => {
+    expect(isValidTag('-rf')).toBe(false);
+    expect(isValidTag('-fast-forward')).toBe(false);
+    expect(isValidTag('-no-verify')).toBe(false);
+  });
+
+  it('rejects tags starting with .', () => {
+    expect(isValidTag('.git')).toBe(false);
+  });
+
+  it('rejects empty / non-string / overlong', () => {
+    expect(isValidTag('')).toBe(false);
+    expect(isValidTag(null)).toBe(false);
+    expect(isValidTag(undefined)).toBe(false);
+    expect(isValidTag(42)).toBe(false);
+    expect(isValidTag('v' + 'a'.repeat(300))).toBe(false);
+  });
+
+  it('rejects whitespace and control characters', () => {
+    expect(isValidTag('v2.7.3 -rf')).toBe(false);
+    expect(isValidTag('v2.7.3\nrm -rf')).toBe(false);
+    expect(isValidTag('v2.7.3\trf')).toBe(false);
+    expect(isValidTag('v2.7.3\x00rf')).toBe(false);
+  });
+
+  it('rejects git ref-format violations', () => {
+    expect(isValidTag('v2.7..3')).toBe(false); // .. forbidden
+    expect(isValidTag('v2~7~3')).toBe(false);  // ~ forbidden
+    expect(isValidTag('v2:7:3')).toBe(false);  // : forbidden
+    expect(isValidTag('v2.7.3?')).toBe(false); // ? forbidden
+    expect(isValidTag('v2.7.3*')).toBe(false); // * forbidden
+    expect(isValidTag('v[7]')).toBe(false);    // [ forbidden
+    expect(isValidTag('v\\7')).toBe(false);    // \ forbidden
+    expect(isValidTag('v^7')).toBe(false);     // ^ forbidden
+  });
+});
+
+describe('assertValidTag', () => {
+  it('returns the tag when valid', () => {
+    expect(assertValidTag('v2.7.3')).toBe('v2.7.3');
+  });
+
+  it('throws on invalid input', () => {
+    expect(() => assertValidTag('-rf')).toThrow(/unsafe release tag/);
+    expect(() => assertValidTag(null)).toThrow(/unsafe release tag/);
+  });
+});
+
+describe('refsTagsForm', () => {
+  it('wraps the tag in refs/tags/', () => {
+    expect(refsTagsForm('v2.7.3')).toBe('refs/tags/v2.7.3');
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/state.test.ts b/src/tests/backend-new/specs/updater/state.test.ts
index 266e895fc8c..b230319c2aa 100644
--- a/src/tests/backend-new/specs/updater/state.test.ts
+++ b/src/tests/backend-new/specs/updater/state.test.ts
@@ -117,3 +117,168 @@ describe('saveState', () => {
     expect(data.schemaVersion).toBe(1);
   });
 });
+
+describe('Tier 2 state extensions', () => {
+  it('EMPTY_STATE carries an idle execution block, bootCount 0, no lastResult', () => {
+    expect(EMPTY_STATE.execution).toEqual({status: 'idle'});
+    expect(EMPTY_STATE.bootCount).toBe(0);
+    expect(EMPTY_STATE.lastResult).toBeNull();
+  });
+
+  it('loadState backfills missing Tier 2 fields on a Tier 1 file', async () => {
+    // Hand-write a Tier 1 state file (no execution / bootCount / lastResult).
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1,
+      lastCheckAt: '2026-05-01T00:00:00Z',
+      lastEtag: 'W/"abc"',
+      latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+    }));
+    const state = await loadState(statePath());
+    expect(state.execution).toEqual({status: 'idle'});
+    expect(state.bootCount).toBe(0);
+    expect(state.lastResult).toBeNull();
+    // Tier 1 fields preserved.
+    expect(state.lastCheckAt).toBe('2026-05-01T00:00:00Z');
+    expect(state.lastEtag).toBe('W/"abc"');
+  });
+
+  it('rejects a malformed execution block by resetting to EMPTY_STATE', async () => {
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: 'not-an-object',
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('rejects an unknown execution status by resetting to EMPTY_STATE', async () => {
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {status: 'totally-bogus'},
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('rejects pending-verification missing fromSha (could break rollback)', async () => {
+    // Regression for Qodo: hand-edited state with a recognised status but
+    // missing required fields would reach RollbackHandler with undefined refs.
+    // Validator must require per-status fields, not just status enum membership.
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {status: 'pending-verification', targetTag: 'v2.7.3', deadlineAt: '2026-05-08T00:00:00Z'},
+      // fromSha intentionally missing
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('rejects rolling-back missing reason / targetTag', async () => {
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {status: 'rolling-back', fromSha: 'abc', at: '2026-05-08T00:00:00Z'},
+      // reason and targetTag missing
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('rejects empty-string fields for required keys', async () => {
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {status: 'executing', targetTag: '', fromSha: 'abc', startedAt: '2026-05-08T00:00:00Z'},
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('accepts a fully-formed pending-verification', async () => {
+    const valid = {
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {
+        status: 'pending-verification',
+        targetTag: 'v2.7.3',
+        fromSha: 'abc123',
+        deadlineAt: '2026-05-08T00:00:00Z',
+      },
+      bootCount: 1,
+      lastResult: null,
+    };
+    await fs.writeFile(statePath(), JSON.stringify(valid));
+    const state = await loadState(statePath());
+    expect(state.execution.status).toBe('pending-verification');
+  });
+
+  it('rejects lastResult with an unrecognised outcome', async () => {
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {status: 'idle'},
+      lastResult: {
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        outcome: 'totally-made-up',
+        reason: null, at: '2026-05-08T00:00:00Z',
+      },
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('rejects a non-numeric bootCount by resetting to EMPTY_STATE', async () => {
+    await fs.writeFile(statePath(), JSON.stringify({
+      schemaVersion: 1, lastCheckAt: null, lastEtag: null, latest: null,
+      vulnerableBelow: [],
+      email: {severeAt: null, vulnerableAt: null, vulnerableNewReleaseTag: null},
+      execution: {status: 'idle'},
+      bootCount: 'one',
+    }));
+    const state = await loadState(statePath());
+    expect(state).toEqual(EMPTY_STATE);
+  });
+
+  it('round-trips a pending-verification execution', async () => {
+    const s = {
+      ...EMPTY_STATE,
+      execution: {
+        status: 'pending-verification' as const,
+        targetTag: 'v2.7.3',
+        fromSha: 'abc123',
+        deadlineAt: '2026-05-08T10:00:00Z',
+      },
+      bootCount: 1,
+    };
+    await saveState(statePath(), s);
+    const loaded = await loadState(statePath());
+    expect(loaded.execution.status).toBe('pending-verification');
+    expect(loaded.bootCount).toBe(1);
+  });
+
+  it('round-trips a non-null lastResult', async () => {
+    const s = {
+      ...EMPTY_STATE,
+      lastResult: {
+        targetTag: 'v2.7.3', fromSha: 'abc',
+        outcome: 'verified' as const, reason: null,
+        at: '2026-05-08T10:00:00Z',
+      },
+    };
+    await saveState(statePath(), s);
+    const loaded = await loadState(statePath());
+    expect(loaded.lastResult?.outcome).toBe('verified');
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/trustedKeys.test.ts b/src/tests/backend-new/specs/updater/trustedKeys.test.ts
new file mode 100644
index 00000000000..fc92e24af7d
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/trustedKeys.test.ts
@@ -0,0 +1,98 @@
+import {describe, it, expect, vi} from 'vitest';
+import {verifyReleaseTag} from '../../../../node/updater/trustedKeys';
+
+const fakeChild = (exitCode: number) => ({
+  on: (e: string, cb: any) => { if (e === 'close') setImmediate(() => cb(exitCode)); },
+});
+
+describe('verifyReleaseTag', () => {
+  it('returns ok with reason "signature-not-required" when requireSignature is false (no spawn)', async () => {
+    const spawnFn = vi.fn();
+    const r = await verifyReleaseTag({
+      tag: 'v2.7.3',
+      repoDir: '/tmp/x',
+      requireSignature: false,
+      trustedKeysPath: null,
+      spawnFn: spawnFn as any,
+    });
+    expect(r).toEqual({ok: true, reason: 'signature-not-required'});
+    expect(spawnFn).not.toHaveBeenCalled();
+  });
+
+  it('returns ok on git verify-tag exit 0', async () => {
+    const spawnFn = vi.fn(() => fakeChild(0));
+    const r = await verifyReleaseTag({
+      tag: 'v2.7.3',
+      repoDir: '/tmp/x',
+      requireSignature: true,
+      trustedKeysPath: null,
+      spawnFn: spawnFn as any,
+    });
+    expect(r).toEqual({ok: true, reason: 'signature-verified'});
+    expect(spawnFn).toHaveBeenCalledWith(
+      'git',
+      // -- terminates options so a future tag-validation regression can't
+      // smuggle a flag past git verify-tag.
+      ['verify-tag', '--', 'v2.7.3'],
+      expect.objectContaining({cwd: '/tmp/x'}),
+    );
+  });
+
+  it('returns failure on non-zero exit', async () => {
+    const spawnFn = vi.fn(() => fakeChild(1));
+    const r = await verifyReleaseTag({
+      tag: 'v2.7.3',
+      repoDir: '/tmp/x',
+      requireSignature: true,
+      trustedKeysPath: null,
+      spawnFn: spawnFn as any,
+    });
+    expect(r).toEqual({ok: false, reason: 'signature-verification-failed'});
+  });
+
+  it('passes GNUPGHOME when trustedKeysPath is set', async () => {
+    const calls: any[] = [];
+    const spawnFn = vi.fn((cmd: string, args: string[], opts: any) => {
+      calls.push({cmd, args, env: opts.env});
+      return fakeChild(0);
+    });
+    await verifyReleaseTag({
+      tag: 'v2.7.3',
+      repoDir: '/tmp/x',
+      requireSignature: true,
+      trustedKeysPath: '/srv/etherpad/keys',
+      spawnFn: spawnFn as any,
+    });
+    expect(calls[0].env.GNUPGHOME).toBe('/srv/etherpad/keys');
+  });
+
+  it('refuses unsafe tags (option-injection guard) before spawning git', async () => {
+    const spawnFn = vi.fn();
+    const r = await verifyReleaseTag({
+      tag: '-no-verify',
+      repoDir: '/tmp/x',
+      requireSignature: true,
+      trustedKeysPath: null,
+      spawnFn: spawnFn as any,
+    });
+    expect(r).toEqual({ok: false, reason: 'signature-verification-failed'});
+    expect(spawnFn).not.toHaveBeenCalled();
+  });
+
+  it('does not set GNUPGHOME when trustedKeysPath is null', async () => {
+    const calls: any[] = [];
+    const spawnFn = vi.fn((cmd: string, args: string[], opts: any) => {
+      calls.push({cmd, args, env: opts.env});
+      return fakeChild(0);
+    });
+    delete process.env.GNUPGHOME;
+    await verifyReleaseTag({
+      tag: 'v2.7.3',
+      repoDir: '/tmp/x',
+      requireSignature: true,
+      trustedKeysPath: null,
+      spawnFn: spawnFn as any,
+    });
+    expect(calls[0].env.GNUPGHOME).toBeUndefined();
+  });
+});
diff --git a/src/tests/backend-new/specs/updater/updateLog.test.ts b/src/tests/backend-new/specs/updater/updateLog.test.ts
new file mode 100644
index 00000000000..ccb17a537ab
--- /dev/null
+++ b/src/tests/backend-new/specs/updater/updateLog.test.ts
@@ -0,0 +1,112 @@
+import {describe, it, expect, beforeEach, afterEach} from 'vitest';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import {tailLines} from '../../../../node/updater/updateLog';
+
+describe('tailLines', () => {
+  let dir: string;
+  let logPath: string;
+
+  beforeEach(async () => {
+    dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-log-'));
+    logPath = path.join(dir, 'update.log');
+  });
+
+  afterEach(async () => {
+    await fs.rm(dir, {recursive: true, force: true});
+  });
+
+  it('returns [] when file is missing', async () => {
+    expect(await tailLines(logPath, 10)).toEqual([]);
+  });
+
+  it('returns [] for an empty file', async () => {
+    await fs.writeFile(logPath, '');
+    expect(await tailLines(logPath, 10)).toEqual([]);
+  });
+
+  it('returns up to N lines when file is shorter', async () => {
+    await fs.writeFile(logPath, 'a\nb\nc\n');
+    expect(await tailLines(logPath, 10)).toEqual(['a', 'b', 'c']);
+  });
+
+  it('returns the last N when file is longer', async () => {
+    const lines = Array.from({length: 500}, (_, i) => `line-${i}`);
+    await fs.writeFile(logPath, lines.join('\n') + '\n');
+    expect(await tailLines(logPath, 5)).toEqual([
+      'line-495', 'line-496', 'line-497', 'line-498', 'line-499',
+    ]);
+  });
+
+  it('handles a final-line-without-newline', async () => {
+    await fs.writeFile(logPath, 'a\nb\nc');
+    expect(await tailLines(logPath, 10)).toEqual(['a', 'b', 'c']);
+  });
+
+  it('handles n=0', async () => {
+    await fs.writeFile(logPath, 'a\nb\nc\n');
+    expect(await tailLines(logPath, 0)).toEqual([]);
+  });
+});
+
+describe('appendLine + rotation', () => {
+  let dir: string;
+  let logPath: string;
+
+  beforeEach(async () => {
+    dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-log-'));
+    logPath = path.join(dir, 'update.log');
+  });
+  afterEach(async () => { await fs.rm(dir, {recursive: true, force: true}); });
+
+  it('appendLine creates parent dir and writes a newline-terminated line', async () => {
+    const {appendLine} = await import('../../../../node/updater/updateLog');
+    const nested = path.join(dir, 'a', 'b', 'update.log');
+    await appendLine(nested, 'hello world');
+    expect(await fs.readFile(nested, 'utf8')).toBe('hello world\n');
+  });
+
+  it('appendLine swallows errors so the caller never breaks on a read-only fs', async () => {
+    const {appendLine} = await import('../../../../node/updater/updateLog');
+    // Make the would-be parent dir a regular file — fs.mkdir then fails with ENOTDIR
+    // (or EEXIST depending on platform), which the helper must swallow.
+    const collide = path.join(dir, 'not-a-dir');
+    await fs.writeFile(collide, 'oops');
+    const target = path.join(collide, 'inner', 'update.log');
+    await appendLine(target, 'x'); // must NOT throw
+  });
+
+  it('rotateIfNeeded shifts .1 -> .2, current -> .1 once over the size threshold', async () => {
+    const {rotateIfNeeded} = await import('../../../../node/updater/updateLog');
+    // Force rotation by passing a tiny limit; write a line above the limit.
+    await fs.writeFile(logPath, 'a'.repeat(50));
+    await rotateIfNeeded(logPath, 10, 3);
+    expect(await fs.readFile(`${logPath}.1`, 'utf8')).toBe('a'.repeat(50));
+    // Original file is gone (or empty after rotation).
+    let exists = true;
+    try { await fs.access(logPath); } catch { exists = false; }
+    expect(exists).toBe(false);
+  });
+
+  it('rotateIfNeeded preserves up to BACKUPS-1 older backups', async () => {
+    const {rotateIfNeeded} = await import('../../../../node/updater/updateLog');
+    await fs.writeFile(logPath, 'newest'.repeat(20));
+    await fs.writeFile(`${logPath}.1`, 'older-1');
+    await fs.writeFile(`${logPath}.2`, 'older-2');
+    await rotateIfNeeded(logPath, 10, 3);
+    expect(await fs.readFile(`${logPath}.1`, 'utf8')).toBe('newest'.repeat(20));
+    expect(await fs.readFile(`${logPath}.2`, 'utf8')).toBe('older-1');
+    expect(await fs.readFile(`${logPath}.3`, 'utf8')).toBe('older-2');
+  });
+
+  it('rotateIfNeeded is a no-op when under the limit', async () => {
+    const {rotateIfNeeded} = await import('../../../../node/updater/updateLog');
+    await fs.writeFile(logPath, 'small');
+    await rotateIfNeeded(logPath, 10 * 1024 * 1024, 3);
+    expect(await fs.readFile(logPath, 'utf8')).toBe('small');
+    let backupExists = true;
+    try { await fs.access(`${logPath}.1`); } catch { backupExists = false; }
+    expect(backupExists).toBe(false);
+  });
+});
diff --git a/src/tests/backend/specs/updateActions.ts b/src/tests/backend/specs/updateActions.ts
new file mode 100644
index 00000000000..6e06577cb7b
--- /dev/null
+++ b/src/tests/backend/specs/updateActions.ts
@@ -0,0 +1,196 @@
+'use strict';
+
+const assert = require('assert').strict;
+const common = require('../common');
+const plugins = require('../../../static/js/pluginfw/plugin_defs');
+import settings from '../../../node/utils/Settings';
+import {saveState} from '../../../node/updater/state';
+import {EMPTY_STATE} from '../../../node/updater/types';
+import path from 'node:path';
+
+const statePath = () => path.join(settings.root, 'var', 'update-state.json');
+const lockPath = () => path.join(settings.root, 'var', 'update.lock');
+
+const authHookNames = ['preAuthorize', 'authenticate', 'authorize'];
+const failHookNames = ['preAuthzFailure', 'authnFailure', 'authzFailure', 'authFailure'];
+
+const installAdminAuth = () => {
+  for (const h of authHookNames.concat(failHookNames)) plugins.hooks[h] = [];
+  plugins.hooks.authenticate = [{
+    hook_fn: (_n: string, ctx: any, cb: Function) => {
+      ctx.req.session.user = {is_admin: true};
+      cb([true]);
+    },
+  }];
+  (settings as any).requireAuthentication = true;
+  (settings as any).requireAuthorization = false;
+  (settings as any).users = {admin: {password: 'admin-pw', is_admin: true}};
+};
+
+describe(__filename, function () {
+  let agent: any;
+  const backups: Record = {};
+  // Bump tier to 'manual' so the action endpoints are mounted by the hook.
+  // (At default tier 'notify' they 404 — that's the gate Qodo #1 introduced.)
+  const originalTier = settings.updates.tier;
+
+  before(async () => {
+    settings.updates.tier = 'manual';
+    agent = await common.init();
+  });
+
+  after(() => {
+    settings.updates.tier = originalTier;
+  });
+
+  beforeEach(async () => {
+    backups.hooks = {};
+    for (const n of authHookNames.concat(failHookNames)) backups.hooks[n] = plugins.hooks[n];
+    backups.settings = {};
+    for (const k of ['requireAuthentication', 'requireAuthorization', 'users']) {
+      backups.settings[k] = (settings as any)[k];
+    }
+    // Seed a known "update available" state so apply has a target tag.
+    await saveState(statePath(), {
+      ...EMPTY_STATE,
+      latest: {
+        version: '99.0.0', tag: 'v99.0.0', body: 'release notes',
+        publishedAt: '2099-01-01T00:00:00Z', prerelease: false,
+        htmlUrl: 'https://example/r/v99.0.0',
+      },
+    });
+    // Ensure no stale lock from an earlier test.
+    try { require('node:fs').unlinkSync(lockPath()); } catch {/* noop */}
+  });
+
+  afterEach(() => {
+    Object.assign(plugins.hooks, backups.hooks);
+    Object.assign(settings, backups.settings);
+  });
+
+  describe('POST /admin/update/apply', function () {
+    it('rejects unauthenticated', async () => {
+      await agent.post('/admin/update/apply').expect(401);
+    });
+
+    it('returns 409 with no-known-latest when state has no latest release', async () => {
+      installAdminAuth();
+      // Replace seeded "update available" with empty state.
+      await saveState(statePath(), {...EMPTY_STATE});
+      const r = await agent.post('/admin/update/apply')
+        .auth('admin', 'admin-pw')
+        .expect(409);
+      assert.equal(r.body.error, 'no-known-latest');
+    });
+
+    it('returns 404 when tier is "notify" (action endpoints disabled)', async () => {
+      // Regression for the Tier 2 gate (Qodo #1): disabled tiers must 404 to
+      // match prior PR-1 behaviour, not 401/403/409.
+      const orig = settings.updates.tier;
+      settings.updates.tier = 'notify';
+      try {
+        await agent.post('/admin/update/apply').expect(404);
+        await agent.post('/admin/update/cancel').expect(404);
+        await agent.post('/admin/update/acknowledge').expect(404);
+        await agent.get('/admin/update/log').expect(404);
+      } finally { settings.updates.tier = orig; }
+    });
+
+    it('rejects when execution is already in flight (409)', async () => {
+      installAdminAuth();
+      await saveState(statePath(), {
+        ...EMPTY_STATE,
+        latest: {
+          version: '99.0.0', tag: 'v99.0.0', body: '', publishedAt: '',
+          prerelease: false, htmlUrl: '',
+        },
+        execution: {
+          status: 'executing', targetTag: 'v99.0.0', fromSha: 'x',
+          startedAt: '2026-05-08T00:00:00Z',
+        },
+      });
+      const r = await agent.post('/admin/update/apply')
+        .auth('admin', 'admin-pw')
+        .expect(409);
+      assert.match(r.body.error, /execution-busy/);
+    });
+  });
+
+  describe('POST /admin/update/cancel', function () {
+    it('rejects unauthenticated', async () => {
+      await agent.post('/admin/update/cancel').expect(401);
+    });
+
+    it('returns 409 when nothing is in flight', async () => {
+      installAdminAuth();
+      await agent.post('/admin/update/cancel').auth('admin', 'admin-pw').expect(409);
+    });
+  });
+
+  describe('POST /admin/update/acknowledge', function () {
+    it('rejects unauthenticated', async () => {
+      await agent.post('/admin/update/acknowledge').expect(401);
+    });
+
+    it('clears a terminal rollback-failed state to idle', async () => {
+      installAdminAuth();
+      await saveState(statePath(), {
+        ...EMPTY_STATE,
+        execution: {
+          status: 'rollback-failed',
+          reason: 'install-failed; rollback failed: pnpm exit 1',
+          targetTag: 'v99.0.0', fromSha: 'x',
+          at: '2026-05-08T00:00:00Z',
+        },
+        lastResult: {
+          targetTag: 'v99.0.0', fromSha: 'x',
+          outcome: 'rollback-failed',
+          reason: 'pnpm install failed',
+          at: '2026-05-08T00:00:00Z',
+        },
+      });
+      await agent.post('/admin/update/acknowledge')
+        .auth('admin', 'admin-pw').expect(200);
+      const status = await agent.get('/admin/update/status').expect(200);
+      assert.equal(status.body.execution.status, 'idle');
+      // lastResult is preserved on acknowledge so the admin still sees what happened.
+      assert.equal(status.body.lastResult.outcome, 'rollback-failed');
+    });
+
+    it('clears a preflight-failed state to idle', async () => {
+      installAdminAuth();
+      await saveState(statePath(), {
+        ...EMPTY_STATE,
+        execution: {
+          status: 'preflight-failed',
+          targetTag: 'v99.0.0',
+          reason: 'low-disk-space',
+          at: '2026-05-08T00:00:00Z',
+        },
+      });
+      await agent.post('/admin/update/acknowledge')
+        .auth('admin', 'admin-pw').expect(200);
+    });
+
+    it('refuses to clear a non-terminal state (409)', async () => {
+      installAdminAuth();
+      await saveState(statePath(), {...EMPTY_STATE});
+      await agent.post('/admin/update/acknowledge')
+        .auth('admin', 'admin-pw').expect(409);
+    });
+  });
+
+  describe('GET /admin/update/log', function () {
+    it('rejects unauthenticated', async () => {
+      await agent.get('/admin/update/log').expect(401);
+    });
+
+    it('returns a text body (possibly empty) for an admin', async () => {
+      installAdminAuth();
+      const res = await agent.get('/admin/update/log')
+        .auth('admin', 'admin-pw').expect(200);
+      assert.equal(typeof res.text, 'string');
+      assert.match(res.headers['content-type'], /text\/plain/);
+    });
+  });
+});
diff --git a/src/tests/backend/specs/updateStatus.ts b/src/tests/backend/specs/updateStatus.ts
index 942f5a255c6..e8fb02fa03e 100644
--- a/src/tests/backend/specs/updateStatus.ts
+++ b/src/tests/backend/specs/updateStatus.ts
@@ -88,6 +88,40 @@ describe(__filename, function () {
       assert.ok(Array.isArray(res.body.vulnerableBelow));
     });
 
+    it('redacts execution.reason / lastResult.reason for unauth callers', async function () {
+      // Seed state with diagnostic strings that would leak environment details.
+      await saveState(statePath(), {
+        ...EMPTY_STATE,
+        execution: {
+          status: 'rollback-failed',
+          reason: 'pnpm install exit 1: ENOSPC at /srv/etherpad/v2.7.3',
+          targetTag: 'v2.7.3',
+          fromSha: 'abc123def456',
+          at: '2026-05-08T00:00:00Z',
+        },
+        lastResult: {
+          targetTag: 'v2.7.3',
+          fromSha: 'abc123def456',
+          outcome: 'rollback-failed',
+          reason: 'pnpm install failed: ENOSPC at /srv/etherpad/v2.7.3',
+          at: '2026-05-08T00:00:00Z',
+        },
+      });
+      const res = await agent.get('/admin/update/status').expect(200);
+      // Status enum + outcome enum are kept (UI needs them).
+      assert.equal(res.body.execution.status, 'rollback-failed');
+      assert.equal(res.body.lastResult.outcome, 'rollback-failed');
+      // Diagnostic fields are stripped for unauth callers.
+      assert.equal(res.body.execution.reason, undefined);
+      assert.equal(res.body.execution.fromSha, undefined);
+      assert.equal(res.body.execution.targetTag, undefined);
+      assert.equal(res.body.lastResult.reason, undefined);
+      assert.equal(res.body.lastResult.fromSha, undefined);
+      assert.equal(res.body.lastResult.targetTag, undefined);
+      // Non-sensitive fields preserved on lastResult.
+      assert.equal(res.body.lastResult.at, '2026-05-08T00:00:00Z');
+    });
+
     describe('when updates.requireAdminForStatus = true', function () {
       const restore: Record = {};
       beforeEach(function () {
@@ -140,5 +174,43 @@ describe(__filename, function () {
           .expect(200);
       });
     });
+
+    describe('admin auth (without requireAdminForStatus)', function () {
+      // requireAdminForStatus=false (default) keeps the endpoint open for the
+      // pad-side / banner usage, but admin callers should still see full
+      // diagnostic detail (execution.reason, fromSha, etc.).
+      it('returns full diagnostic payload to authed admin sessions', async function () {
+        for (const hookName of authHookNames.concat(failHookNames)) plugins.hooks[hookName] = [];
+        plugins.hooks.authenticate = [{
+          hook_fn: (_hookName: string, ctx: any, cb: Function) => {
+            ctx.req.session.user = {is_admin: true};
+            cb([true]);
+          },
+        }];
+        (settings as any).requireAuthentication = true;
+        (settings as any).requireAuthorization = false;
+        (settings as any).users = {admin: {password: 'admin-password', is_admin: true}};
+        await saveState(statePath(), {
+          ...EMPTY_STATE,
+          execution: {
+            status: 'rollback-failed',
+            reason: 'pnpm install exit 1',
+            targetTag: 'v2.7.3', fromSha: 'abc',
+            at: '2026-05-08T00:00:00Z',
+          },
+          lastResult: {
+            targetTag: 'v2.7.3', fromSha: 'abc',
+            outcome: 'rollback-failed', reason: 'pnpm install failed',
+            at: '2026-05-08T00:00:00Z',
+          },
+        });
+        const res = await agent.get('/admin/update/status')
+          .auth('admin', 'admin-password').expect(200);
+        // Admin sees the full diagnostic detail (it's their own server).
+        assert.equal(res.body.execution.reason, 'pnpm install exit 1');
+        assert.equal(res.body.execution.fromSha, 'abc');
+        assert.equal(res.body.lastResult.reason, 'pnpm install failed');
+      });
+    });
   });
 });
diff --git a/src/tests/backend/specs/updater-integration.ts b/src/tests/backend/specs/updater-integration.ts
new file mode 100644
index 00000000000..e04a551c258
--- /dev/null
+++ b/src/tests/backend/specs/updater-integration.ts
@@ -0,0 +1,265 @@
+'use strict';
+
+const assert = require('assert').strict;
+import {execSync, spawn} from 'node:child_process';
+import fs from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import {executeUpdate} from '../../../node/updater/UpdateExecutor';
+import {performRollback, checkPendingVerification} from '../../../node/updater/RollbackHandler';
+import {EMPTY_STATE, UpdateState} from '../../../node/updater/types';
+
+const sh = (cmd: string, opts: any = {}) =>
+  execSync(cmd, {stdio: 'pipe', ...opts}).toString().trim();
+
+const buildTmpRepo = async (): Promise<{dir: string; v1Sha: string; v2Sha: string}> => {
+  const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'updater-it-'));
+  sh('git init -b main', {cwd: dir});
+  sh('git config user.email test@example.com', {cwd: dir});
+  sh('git config user.name test', {cwd: dir});
+  sh('git config commit.gpgsign false', {cwd: dir});
+  sh('git config tag.gpgSign false', {cwd: dir});
+  await fs.writeFile(path.join(dir, 'pnpm-lock.yaml'), 'lockfileVersion: x\n');
+  sh('git add . && git commit -m initial', {cwd: dir});
+  sh('git tag v0.0.1', {cwd: dir});
+  const v1Sha = sh('git rev-parse HEAD', {cwd: dir});
+  await fs.writeFile(path.join(dir, 'pnpm-lock.yaml'), 'lockfileVersion: y\n');
+  sh('git add . && git commit -m bump', {cwd: dir});
+  sh('git tag v0.0.2', {cwd: dir});
+  const v2Sha = sh('git rev-parse HEAD', {cwd: dir});
+  // Reset to v1 — that's our "currently installed" version.
+  sh('git checkout v0.0.1', {cwd: dir});
+  // Add a self-pointing origin so executor's git fetch works.
+  sh(`git remote add origin ${dir}`, {cwd: dir});
+  // Pre-prime origin's tag list (git fetch from a local origin sees both).
+  return {dir, v1Sha, v2Sha};
+};
+
+/**
+ * Spawn override: route every git ... call to the real binary, but stub pnpm
+ * to a controlled exit code. Lets tests assert "git fetch + checkout actually
+ * mutated the repo" without ever invoking pnpm install for real.
+ */
+const stubSpawn = (pnpmExits: Record) =>
+  (cmd: string, args: string[], opts: any) => {
+    if (cmd === 'pnpm') {
+      const key = `pnpm ${args.join(' ')}`;
+      const exit = pnpmExits[key];
+      if (exit === undefined) {
+        throw new Error(`Unexpected pnpm call in integration stub: ${key}`);
+      }
+      return {
+        stdout: {on: () => {}},
+        stderr: {on: () => {}},
+        on: (e: string, cb: any) => { if (e === 'close') setImmediate(() => cb(exit)); },
+      };
+    }
+    return spawn(cmd, args, opts);
+  };
+
+describe(__filename, function () {
+  this.timeout(30_000);
+
+  it('happy path: executes against tmp repo, lands on pending-verification, exits 75', async () => {
+    const {dir, v1Sha} = await buildTmpRepo();
+    try {
+      const states: UpdateState[] = [];
+      let exitedWith: number | null = null;
+      const r = await executeUpdate({
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({
+          'pnpm install --frozen-lockfile': 0,
+          'pnpm run build:ui': 0,
+        }) as any,
+        readSha: async () => sh('git rev-parse HEAD', {cwd: dir}),
+        copyFile: async (s, d) => {
+          await fs.mkdir(path.dirname(d), {recursive: true});
+          await fs.copyFile(s, d);
+        },
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        initialState: structuredClone(EMPTY_STATE),
+        targetTag: 'v0.0.2',
+        now: () => new Date(),
+        exit: (code) => { exitedWith = code; },
+      });
+      assert.equal(r.outcome, 'pending-verification');
+      assert.equal(exitedWith, 75);
+      assert.equal(states.at(-1)!.execution.status, 'pending-verification');
+      // Working tree is now on v0.0.2.
+      assert.equal(sh('git rev-parse HEAD', {cwd: dir}), sh('git rev-parse v0.0.2', {cwd: dir}));
+      // Backup has the v0.0.1-era lockfile.
+      const backup = await fs.readFile(path.join(dir, 'var', 'update-backup', 'pnpm-lock.yaml'), 'utf8');
+      assert.match(backup, /lockfileVersion: x/);
+      // The fromSha recorded in state matches the v0.0.1 SHA.
+      assert.equal((states.at(-1)!.execution as {fromSha: string}).fromSha, v1Sha);
+    } finally {
+      await fs.rm(dir, {recursive: true, force: true});
+    }
+  });
+
+  it('install failure rolls back to original SHA + lockfile', async () => {
+    const {dir, v1Sha} = await buildTmpRepo();
+    try {
+      const states: UpdateState[] = [];
+      let exitedWith: number | null = null;
+
+      // Phase 1: executor with failing install.
+      await executeUpdate({
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 1}) as any,
+        readSha: async () => sh('git rev-parse HEAD', {cwd: dir}),
+        copyFile: async (s, d) => {
+          await fs.mkdir(path.dirname(d), {recursive: true});
+          await fs.copyFile(s, d);
+        },
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        initialState: structuredClone(EMPTY_STATE),
+        targetTag: 'v0.0.2',
+        now: () => new Date(),
+        exit: (c) => { exitedWith = c; },
+      });
+      assert.equal(states.at(-1)!.execution.status, 'rolling-back');
+
+      // Phase 2: rollback.
+      await performRollback(states.at(-1)!, {
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0}) as any,
+        copyFile: (s, d) => fs.copyFile(s, d),
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        exit: (c) => { exitedWith = c; },
+        now: () => new Date(),
+        rollbackHealthCheckSeconds: 60,
+      });
+      assert.equal(states.at(-1)!.execution.status, 'rolled-back');
+      assert.equal(sh('git rev-parse HEAD', {cwd: dir}), v1Sha);
+      assert.equal(exitedWith, 75);
+      // Working tree's pnpm-lock.yaml was restored from backup.
+      const lock = await fs.readFile(path.join(dir, 'pnpm-lock.yaml'), 'utf8');
+      assert.match(lock, /lockfileVersion: x/);
+    } finally {
+      await fs.rm(dir, {recursive: true, force: true});
+    }
+  });
+
+  it('build failure rolls back to original SHA', async () => {
+    const {dir, v1Sha} = await buildTmpRepo();
+    try {
+      const states: UpdateState[] = [];
+
+      await executeUpdate({
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({
+          'pnpm install --frozen-lockfile': 0,
+          'pnpm run build:ui': 1,
+        }) as any,
+        readSha: async () => sh('git rev-parse HEAD', {cwd: dir}),
+        copyFile: async (s, d) => {
+          await fs.mkdir(path.dirname(d), {recursive: true});
+          await fs.copyFile(s, d);
+        },
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        initialState: structuredClone(EMPTY_STATE),
+        targetTag: 'v0.0.2',
+        now: () => new Date(),
+        exit: () => {},
+      });
+      assert.equal(states.at(-1)!.execution.status, 'rolling-back');
+
+      await performRollback(states.at(-1)!, {
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0}) as any,
+        copyFile: (s, d) => fs.copyFile(s, d),
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        exit: () => {},
+        now: () => new Date(),
+        rollbackHealthCheckSeconds: 60,
+      });
+      assert.equal(states.at(-1)!.execution.status, 'rolled-back');
+      assert.equal(sh('git rev-parse HEAD', {cwd: dir}), v1Sha);
+    } finally {
+      await fs.rm(dir, {recursive: true, force: true});
+    }
+  });
+
+  it('crash-loop guard: bootCount=3 forces immediate rollback', async () => {
+    const {dir, v1Sha} = await buildTmpRepo();
+    try {
+      // Simulate "post-update boot": working tree on v0.0.2, backup lockfile from v0.0.1
+      // already in place, state is pending-verification with bootCount=3.
+      sh('git checkout v0.0.2', {cwd: dir});
+      await fs.mkdir(path.join(dir, 'var', 'update-backup'), {recursive: true});
+      // Backup the v0.0.1 lockfile content (we know v0.0.1's lockfile was 'x' from buildTmpRepo).
+      await fs.writeFile(path.join(dir, 'var', 'update-backup', 'pnpm-lock.yaml'), 'lockfileVersion: x\n');
+
+      const states: UpdateState[] = [];
+      let exitedWith: number | null = null;
+      const state: UpdateState = {
+        ...structuredClone(EMPTY_STATE),
+        execution: {
+          status: 'pending-verification',
+          targetTag: 'v0.0.2',
+          fromSha: v1Sha,
+          deadlineAt: '2026-05-08T10:00:00Z',
+        },
+        bootCount: 3,
+      };
+      const r = checkPendingVerification(state, {
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0}) as any,
+        copyFile: (s, d) => fs.copyFile(s, d),
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        exit: (c) => { exitedWith = c; },
+        now: () => new Date(),
+        rollbackHealthCheckSeconds: 60,
+      });
+      assert.equal(r.armed, false);
+      // Wait for the fire-and-forget rollback to finish.
+      await new Promise((resolve) => setTimeout(resolve, 250));
+      assert.equal(states.at(-1)!.execution.status, 'rolled-back');
+      assert.equal(sh('git rev-parse HEAD', {cwd: dir}), v1Sha);
+      assert.equal(exitedWith, 75);
+    } finally {
+      await fs.rm(dir, {recursive: true, force: true});
+    }
+  });
+
+  it('rollback failure (target SHA does not exist) lands on terminal rollback-failed', async () => {
+    const {dir} = await buildTmpRepo();
+    try {
+      const states: UpdateState[] = [];
+      let exitedWith: number | null = null;
+      const state: UpdateState = {
+        ...structuredClone(EMPTY_STATE),
+        execution: {
+          status: 'rolling-back',
+          reason: 'install-failed',
+          targetTag: 'v0.0.2',
+          // 40 hex chars but no such commit — git checkout -f will reject.
+          fromSha: '0000000000000000000000000000000000000000',
+          at: '2026-05-08T10:00:00Z',
+        },
+      };
+      await performRollback(state, {
+        repoDir: dir,
+        backupDir: path.join(dir, 'var', 'update-backup'),
+        spawnFn: stubSpawn({'pnpm install --frozen-lockfile': 0}) as any,
+        copyFile: (s, d) => fs.copyFile(s, d),
+        saveState: async (s) => { states.push(structuredClone(s)); },
+        exit: (c) => { exitedWith = c; },
+        now: () => new Date(),
+        rollbackHealthCheckSeconds: 60,
+      });
+      assert.equal(states.at(-1)!.execution.status, 'rollback-failed');
+      assert.equal(states.at(-1)!.lastResult!.outcome, 'rollback-failed');
+      assert.equal(exitedWith, 75);
+    } finally {
+      await fs.rm(dir, {recursive: true, force: true});
+    }
+  });
+});
diff --git a/src/tests/frontend-new/admin-spec/update-page-actions.spec.ts b/src/tests/frontend-new/admin-spec/update-page-actions.spec.ts
new file mode 100644
index 00000000000..bdca6df7e45
--- /dev/null
+++ b/src/tests/frontend-new/admin-spec/update-page-actions.spec.ts
@@ -0,0 +1,111 @@
+import {expect, test} from '@playwright/test';
+import {loginToAdmin} from '../helper/adminhelper';
+
+const baseStatus = {
+  currentVersion: '2.7.1',
+  latest: {
+    version: '2.7.2',
+    tag: 'v2.7.2',
+    body: 'release notes',
+    publishedAt: '2026-05-01T00:00:00Z',
+    prerelease: false,
+    htmlUrl: 'https://github.com/ether/etherpad/releases/tag/v2.7.2',
+  },
+  lastCheckAt: '2026-05-08T00:00:00Z',
+  installMethod: 'git',
+  tier: 'manual',
+  policy: {canNotify: true, canManual: true, canAuto: false, canAutonomous: false, reason: 'ok'},
+  vulnerableBelow: [],
+  execution: {status: 'idle'},
+  lastResult: null,
+  lockHeld: false,
+};
+
+test.describe('admin update page actions', () => {
+  test.beforeEach(async ({page}) => {
+    await loginToAdmin(page, 'admin', 'changeme1');
+  });
+
+  test('Apply button posts /admin/update/apply and re-fetches status', async ({page}) => {
+    let postedApply = false;
+    let statusFetches = 0;
+    await page.route('**/admin/update/status', async (route) => {
+      statusFetches += 1;
+      await route.fulfill({
+        status: 200,
+        contentType: 'application/json',
+        body: JSON.stringify(baseStatus),
+      });
+    });
+    await page.route('**/admin/update/apply', async (route) => {
+      postedApply = true;
+      await route.fulfill({status: 202, contentType: 'application/json', body: JSON.stringify({accepted: true})});
+    });
+
+    await page.goto('http://localhost:9001/admin/update');
+    await expect(page.getByRole('button', {name: /apply update/i})).toBeVisible({timeout: 30000});
+
+    await page.getByRole('button', {name: /apply update/i}).click();
+    await expect.poll(() => postedApply, {timeout: 15000}).toBe(true);
+    // After Apply, the page re-fetches status. Initial load = 1 fetch + Apply re-fetch >= 2.
+    await expect.poll(() => statusFetches, {timeout: 15000}).toBeGreaterThanOrEqual(2);
+  });
+
+  test('install-method-not-writable hides Apply and shows the policy-denial copy', async ({page}) => {
+    const denied = {
+      ...baseStatus,
+      installMethod: 'docker',
+      policy: {canNotify: true, canManual: false, canAuto: false, canAutonomous: false, reason: 'install-method-not-writable'},
+    };
+    await page.route('**/admin/update/status', (route) =>
+      route.fulfill({status: 200, contentType: 'application/json', body: JSON.stringify(denied)}));
+
+    await page.goto('http://localhost:9001/admin/update');
+    // Heading rendered; no Apply button.
+    await expect(page.getByRole('heading', {name: /etherpad updates/i})).toBeVisible({timeout: 30000});
+    await expect(page.getByRole('button', {name: /apply update/i})).toHaveCount(0);
+    // Localised denial copy.
+    await expect(page.getByText(/Updates from the admin UI require a git install/i)).toBeVisible();
+  });
+
+  test('rollback-failed terminal state shows Acknowledge and lastResult copy', async ({page}) => {
+    const terminal = {
+      ...baseStatus,
+      execution: {
+        status: 'rollback-failed',
+        reason: 'pnpm install failed; rollback failed: pnpm exit 1',
+        targetTag: 'v2.7.2',
+        fromSha: 'abc',
+        at: '2026-05-08T00:00:00Z',
+      },
+      lastResult: {
+        targetTag: 'v2.7.2',
+        fromSha: 'abc',
+        outcome: 'rollback-failed',
+        reason: 'pnpm install failed',
+        at: '2026-05-08T00:00:00Z',
+      },
+      policy: {canNotify: true, canManual: true, canAuto: false, canAutonomous: false, reason: 'rollback-failed-terminal'},
+    };
+    await page.route('**/admin/update/status', (route) =>
+      route.fulfill({status: 200, contentType: 'application/json', body: JSON.stringify(terminal)}));
+
+    await page.goto('http://localhost:9001/admin/update');
+    await expect(page.getByRole('button', {name: /acknowledge/i})).toBeVisible({timeout: 30000});
+    // lastResult copy uses i18n update.page.last_result.rollback-failed.
+    // Both the banner and the lastResult paragraph contain "Manual intervention
+    // required" — scope to the lastResult 

so we get exactly one match. + await expect(page.locator('p.last-result-rollback-failed')).toBeVisible(); + await expect(page.locator('p.last-result-rollback-failed')).toContainText(/Manual intervention required/i); + }); + + test('lockHeld true hides the Apply button even when policy.canManual is on', async ({page}) => { + const locked = {...baseStatus, lockHeld: true}; + await page.route('**/admin/update/status', (route) => + route.fulfill({status: 200, contentType: 'application/json', body: JSON.stringify(locked)})); + + await page.goto('http://localhost:9001/admin/update'); + await expect(page.getByRole('heading', {name: /etherpad updates/i})).toBeVisible({timeout: 30000}); + await expect(page.getByRole('button', {name: /apply update/i})).toHaveCount(0); + }); +});