A run could get stranded at 'running' in the UI after a crash/disconnect/ restart, with no way to clear it. Root cause was a race: the SSE history replay re-asserted a stale `running` status that beat the poll's settled status, leaving the run showing "Running" + the settle error at once. Server (runs.ts / runner.ts / index.ts): - reconcile() on every read force-settles any 'running' run with no live runner, so the board self-heals on the next poll (≤3s) — no restart needed. - forceSettle() emits a persisted `status` event so an open/reconnecting SSE stream replays the terminal state last, not a stale `running`. - Startup orphan-reconciliation now also emits that event (was the gap that let the replay re-assert `running` after a server restart). - Idle watchdog (10min): a silent pi is settled as 'failed' instead of hanging forever; SIGKILL escalation (20s) reaps wedged processes. - stop() now recovers: active→abort, orphaned-but-running→force-stop (the Stop button clears wedged runs instead of 409'ing). - start() catch force-settles 'failed' so a spawn failure never orphans a half-created 'running' row. Client (useOrchestrator.ts): - patchRun refuses to un-settle a terminal run, dropping stale replayed status as a belt-and-suspenders guard against any such race. EOF && echo "" && git log --oneline -3
292 lines
9.8 KiB
TypeScript
292 lines
9.8 KiB
TypeScript
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
|
import {
|
|
orchestratorApi,
|
|
type AgentRun,
|
|
type DiffResult,
|
|
type MergeResult,
|
|
type RunStatus,
|
|
} from '../../lib/orchestratorApi';
|
|
|
|
/**
|
|
* Shared run registry for the implementation board.
|
|
*
|
|
* Owns the lightweight, board-level slice of orchestrator state: the run list,
|
|
* Bevy-playtest flags, and the derived active-run index. It deliberately does
|
|
* NOT hold the streaming event log — that lives in `useRunStream`, scoped to the
|
|
* run console (`AgentRunBar`), so a burst of agent events re-renders only the
|
|
* console and never the board page or card modal. Lifecycle changes observed in
|
|
* a stream (status / bevy / done) are pushed back here via `patchRun` /
|
|
* `reflectBevy` so the board's active indicators stay correct.
|
|
*
|
|
* The returned object is memoized on its state/callbacks, so its identity is
|
|
* stable between registry lifecycle changes (not, e.g., on every render).
|
|
*/
|
|
|
|
export interface UseOrchestrator {
|
|
/** Newest run for a card (active if one is running, else the last settled). */
|
|
runForCard: (cardId: string) => AgentRun | undefined;
|
|
/** True while a run is actively working the given card. */
|
|
isRunning: (cardId: string) => boolean;
|
|
/** All known runs (newest first), for a global activity view. */
|
|
runs: AgentRun[];
|
|
/** Load initial state. */
|
|
reload: () => Promise<void>;
|
|
loading: boolean;
|
|
error: string | null;
|
|
/** Begin a run for a card. Returns the created run. */
|
|
start: (input: { cardId: string; prompt?: string; refineRunId?: string }) => Promise<AgentRun>;
|
|
/** Send a steer/follow-up message to an active run. */
|
|
message: (runId: string, text: string, mode: 'steer' | 'followUp') => Promise<void>;
|
|
/** Stop an active run. */
|
|
stop: (runId: string) => Promise<void>;
|
|
/** Remove a settled run from the UI (and reclaim its worktree). */
|
|
remove: (runId: string) => Promise<void>;
|
|
/** Apply a partial update to a run record (used by stream reflectors). */
|
|
patchRun: (runId: string, patch: Partial<AgentRun>) => void;
|
|
/** Reflect a Bevy playtest lifecycle change (used by stream reflectors). */
|
|
reflectBevy: (runId: string, running: boolean) => void;
|
|
/** Fetch a run's branch diff vs main. */
|
|
getDiff: (runId: string) => Promise<DiffResult>;
|
|
/** Merge a run's branch into the main worktree. */
|
|
mergeRun: (runId: string) => Promise<MergeResult>;
|
|
/** Start a Bevy playtest in a run's worktree. */
|
|
startBevy: (runId: string) => Promise<void>;
|
|
/** Stop a run's Bevy playtest. */
|
|
stopBevy: (runId: string) => Promise<void>;
|
|
/** Whether a Bevy playtest is running for a run. */
|
|
bevyIsRunning: (runId: string) => boolean;
|
|
/** Re-fetch a run's Bevy status from the server (truth after a reconnect). */
|
|
refreshBevyStatus: (runId: string) => Promise<void>;
|
|
/**
|
|
* Active runs indexed by card id. Memoized and referentially stable unless
|
|
* the active set actually changes (not on every streamed event), so memoized
|
|
* card components can read their flags without re-rendering on noise.
|
|
*/
|
|
activeByCard: Map<string, { running: boolean; bevy: boolean; runId: string }>;
|
|
}
|
|
|
|
/** Terminal states a run never legitimately leaves once reached. Used by
|
|
* `patchRun` to reject stale history replays that would un-settle a run. */
|
|
const TERMINAL_STATUS: ReadonlySet<RunStatus> = new Set(['completed', 'failed', 'stopped']);
|
|
|
|
export function useOrchestrator(): UseOrchestrator {
|
|
const [runs, setRuns] = useState<AgentRun[]>([]);
|
|
const [bevyRunning, setBevyRunning] = useState<Set<string>>(new Set());
|
|
const [loading, setLoading] = useState(true);
|
|
const [error, setError] = useState<string | null>(null);
|
|
|
|
// Signature of the last loaded run list, so the background poll can skip
|
|
// state updates (and re-renders) when nothing actually changed.
|
|
const lastSig = useRef('');
|
|
|
|
const reload = useCallback(async () => {
|
|
setError(null);
|
|
try {
|
|
const { runs: list } = await orchestratorApi.listRuns();
|
|
// Only the fields that affect what the UI renders; identities/order are
|
|
// stable from the server (newest-first), so this is a reliable change check.
|
|
const sig = list
|
|
.map((r) => `${r.id}|${r.status}|${r.finishedAt ?? ''}|${r.summary ?? ''}|${r.commitSha ?? ''}`)
|
|
.join('\n');
|
|
if (sig !== lastSig.current) {
|
|
lastSig.current = sig;
|
|
setRuns(list);
|
|
}
|
|
} catch (e) {
|
|
setError(e instanceof Error ? e.message : 'Failed to load runs');
|
|
} finally {
|
|
setLoading(false);
|
|
}
|
|
}, []);
|
|
|
|
useEffect(() => {
|
|
void reload();
|
|
}, [reload]);
|
|
|
|
// Background poll keeps run status fresh even when no card modal is open.
|
|
// Poll faster while any run is active so a collapsed card's running indicator
|
|
// turns over promptly when it settles. (The live event stream, when a modal is
|
|
// open, is the primary updater; this is a liveness backstop.)
|
|
const anyRunning = useMemo(() => runs.some((r) => r.status === 'running'), [runs]);
|
|
useEffect(() => {
|
|
const ms = anyRunning ? 3_000 : 10_000;
|
|
const id = setInterval(() => {
|
|
void reload();
|
|
}, ms);
|
|
return () => clearInterval(id);
|
|
}, [reload, anyRunning]);
|
|
|
|
const upsertRun = useCallback((run: AgentRun) => {
|
|
setRuns((prev) => {
|
|
const next = prev.filter((r) => r.id !== run.id);
|
|
next.unshift(run);
|
|
return next;
|
|
});
|
|
}, []);
|
|
|
|
const start = useCallback(
|
|
async (input: { cardId: string; prompt?: string; refineRunId?: string }) => {
|
|
const { run } = await orchestratorApi.startRun(input);
|
|
upsertRun(run);
|
|
return run;
|
|
},
|
|
[upsertRun],
|
|
);
|
|
|
|
const message = useCallback(
|
|
async (runId: string, text: string, mode: 'steer' | 'followUp') => {
|
|
await orchestratorApi.messageRun(runId, text, mode);
|
|
},
|
|
[],
|
|
);
|
|
|
|
const stop = useCallback(async (runId: string) => {
|
|
await orchestratorApi.stopRun(runId);
|
|
}, []);
|
|
|
|
const remove = useCallback(async (runId: string) => {
|
|
await orchestratorApi.deleteRun(runId);
|
|
setRuns((prev) => prev.filter((r) => r.id !== runId));
|
|
}, []);
|
|
|
|
/**
|
|
* Apply a partial update to a run (status/summary/etc., from stream events).
|
|
* Guards against stale history replays un-settling a run: a terminal run can
|
|
* never legitimately return to `running`/`queued`, so such a patch (e.g. a
|
|
* reconnect re-emitting an old `running` status) is dropped.
|
|
*/
|
|
const patchRun = useCallback((runId: string, patch: Partial<AgentRun>) => {
|
|
setRuns((prev) => {
|
|
const cur = prev.find((r) => r.id === runId);
|
|
if (
|
|
cur &&
|
|
TERMINAL_STATUS.has(cur.status) &&
|
|
patch.status !== undefined &&
|
|
!TERMINAL_STATUS.has(patch.status)
|
|
) {
|
|
return prev; // ignore stale status re-assertion
|
|
}
|
|
return prev.map((r) => (r.id === runId ? { ...r, ...patch } : r));
|
|
});
|
|
}, []);
|
|
|
|
/** Reflect a Bevy playtest lifecycle change (from stream events). */
|
|
const reflectBevy = useCallback((runId: string, running: boolean) => {
|
|
setBevyRunning((prev) => {
|
|
const next = new Set(prev);
|
|
if (running) next.add(runId);
|
|
else next.delete(runId);
|
|
return next;
|
|
});
|
|
}, []);
|
|
|
|
const runForCard = useCallback(
|
|
(cardId: string) => {
|
|
const forCard = runs.filter((r) => r.cardId === cardId);
|
|
const active = forCard.find((r) => r.status === 'running');
|
|
return active ?? forCard[0];
|
|
},
|
|
[runs],
|
|
);
|
|
|
|
const isRunning = useCallback(
|
|
(cardId: string) => Boolean(runs.find((r) => r.cardId === cardId && r.status === 'running')),
|
|
[runs],
|
|
);
|
|
|
|
const getDiff = useCallback((runId: string) => orchestratorApi.getDiff(runId), []);
|
|
const mergeRun = useCallback((runId: string) => orchestratorApi.mergeRun(runId), []);
|
|
|
|
const startBevy = useCallback(async (runId: string) => {
|
|
await orchestratorApi.startBevy(runId);
|
|
setBevyRunning((prev) => new Set(prev).add(runId));
|
|
}, []);
|
|
|
|
const stopBevy = useCallback(async (runId: string) => {
|
|
await orchestratorApi.stopBevy(runId);
|
|
// Optimistically clear; the `end` event reconciles.
|
|
setBevyRunning((prev) => {
|
|
const next = new Set(prev);
|
|
next.delete(runId);
|
|
return next;
|
|
});
|
|
}, []);
|
|
|
|
const bevyIsRunning = useCallback((runId: string) => bevyRunning.has(runId), [bevyRunning]);
|
|
|
|
/**
|
|
* Card-id index of active runs. Recomputed only when `runs` or `bevyRunning`
|
|
* changes — NOT on every streamed event — so memoized consumers stay stable.
|
|
*/
|
|
const activeByCard = useMemo(() => {
|
|
const m = new Map<string, { running: boolean; bevy: boolean; runId: string }>();
|
|
for (const r of runs) {
|
|
if (r.status === 'running') {
|
|
m.set(r.cardId, { running: true, bevy: bevyRunning.has(r.id), runId: r.id });
|
|
}
|
|
}
|
|
return m;
|
|
}, [runs, bevyRunning]);
|
|
|
|
const refreshBevyStatus = useCallback(async (runId: string) => {
|
|
try {
|
|
const { running } = await orchestratorApi.bevyStatus(runId);
|
|
setBevyRunning((prev) => {
|
|
const next = new Set(prev);
|
|
if (running) next.add(runId);
|
|
else next.delete(runId);
|
|
return next;
|
|
});
|
|
} catch {
|
|
/* server unavailable — keep current state */
|
|
}
|
|
}, []);
|
|
|
|
// Stable identity: re-created only when registry state changes, so consumers
|
|
// (and the per-run stream effect) don't churn on unrelated renders.
|
|
return useMemo<UseOrchestrator>(
|
|
() => ({
|
|
runForCard,
|
|
isRunning,
|
|
runs,
|
|
reload,
|
|
loading,
|
|
error,
|
|
start,
|
|
message,
|
|
stop,
|
|
remove,
|
|
patchRun,
|
|
reflectBevy,
|
|
getDiff,
|
|
mergeRun,
|
|
startBevy,
|
|
stopBevy,
|
|
bevyIsRunning,
|
|
refreshBevyStatus,
|
|
activeByCard,
|
|
}),
|
|
[
|
|
runForCard,
|
|
isRunning,
|
|
runs,
|
|
reload,
|
|
loading,
|
|
error,
|
|
start,
|
|
message,
|
|
stop,
|
|
remove,
|
|
patchRun,
|
|
reflectBevy,
|
|
getDiff,
|
|
mergeRun,
|
|
startBevy,
|
|
stopBevy,
|
|
bevyIsRunning,
|
|
refreshBevyStatus,
|
|
activeByCard,
|
|
],
|
|
);
|
|
}
|