feat(kanban): resume runs' chat via Refine + isolate the event stream

Two intertwined changes that both touch the orchestrator hook + run console: Isolate the agent event stream (perf): - useRunStream owns the SSE stream + event log locally inside AgentRunBar, so a burst of streamed events re-renders only the console — never the board page or card modal (which was causing frame drops at run start). - useOrchestrator is now a registry only; lifecycle events reflect back up via stable patchRun/reflectBevy reflectors (effect deps depend on those, not the whole object, avoiding a stream-teardown loop). Session resume for Refine: - Runs now persist their pi session (drop --no-session); each fresh run captures its session JSONL path into a new agent_runs.session_file column (additive, idempotent migration). - Refine resumes the prior run's actual session (--session <path> → appends) in that run's own worktree (inherited, never owned), sending the operator's feedback as the next message in the same conversation with full prior context. - owns_worktree guards remove()/cleanup so a refinement never destroys the owning run's worktree; bad refinement targets return 409. - AgentRunBar shows Refine only for settled runs with a recorded session. EOF && echo "" && git log --oneline -3
2026-06-17 18:34:05 -04:00
parent 407bc4f790
commit 6531dc00df
10 changed files with 391 additions and 182 deletions
--- a/apps/docs/src/components/kanban/AgentRunBar.tsx
+++ b/apps/docs/src/components/kanban/AgentRunBar.tsx
@@ -3,6 +3,7 @@ import type { Card } from '../../lib/kanbanApi';
 import type { UseOrchestrator } from './useOrchestrator';
 import { DiffModal } from './DiffModal';
 import { RunEventList } from './RunEventList';
+import { useRunStream } from './useRunStream';

 /**
 * Inline agentic console for one kanban card.
@@ -41,6 +42,8 @@ export function AgentRunBar({ card, orch }: AgentRunBarProps) {
  const [promptDraft, setPromptDraft] = useState('');
  const [steerDraft, setSteerDraft] = useState('');
  const [showPrompt, setShowPrompt] = useState(false);
+  const [refineDraft, setRefineDraft] = useState('');
+  const [showRefine, setShowRefine] = useState(false);
  const [showDiff, setShowDiff] = useState(false);
  const [mergeResult, setMergeResult] = useState<{ ok: boolean; text: string } | null>(null);
  const [mergeBusy, setMergeBusy] = useState(false);
@@ -50,13 +53,8 @@ export function AgentRunBar({ card, orch }: AgentRunBarProps) {

  const bevyRunning = run ? orch.bevyIsRunning(run.id) : false;

-  // Stream the run's events while it exists (agent run OR a later Bevy test);
-  // the card stays expanded so playtest output keeps flowing after settle.
-  useEffect(() => {
-    if (!run) return;
-    orch.watch(run.id);
-    return () => orch.unwatch(run.id);
-  }, [run, orch]);
+  // (The live event stream is owned locally by `useRunStream` below, so a burst
+  // of agent events re-renders only this console — never the board or modal.)

  // Sync Bevy status from the server when the run changes (truth after a
  // reconnect/refresh; live lifecycle is mirrored via `bevy` events).
@@ -64,7 +62,7 @@ export function AgentRunBar({ card, orch }: AgentRunBarProps) {
    if (run) void orch.refreshBevyStatus(run.id);
  }, [run, orch]);

-  const events = run ? orch.eventsForRun(run.id) : [];
+  const events = useRunStream(run?.id ?? null, orch);
  useEffect(() => {
    if (logRef.current) logRef.current.scrollTop = logRef.current.scrollHeight;
  }, [events.length]);
@@ -93,6 +91,29 @@ export function AgentRunBar({ card, orch }: AgentRunBarProps) {
    }
  };

+  /**
+   * Start a refinement run: continues this run's branch (prior commits present)
+   * seeded with the operator's feedback. The new run replaces this one in the
+   * console since `runForCard` returns the active run.
+   */
+  const refine = async () => {
+    if (!run) return;
+    setBusy(true);
+    try {
+      await orch.start({
+        cardId: card.id,
+        prompt: refineDraft.trim() || undefined,
+        refineRunId: run.id,
+      });
+      setRefineDraft('');
+      setShowRefine(false);
+    } catch (e) {
+      alert(e instanceof Error ? e.message : 'Failed to start refinement');
+    } finally {
+      setBusy(false);
+    }
+  };
+
  const steer = async () => {
    if (!run || !steerDraft.trim()) return;
    setBusy(true);
@@ -173,9 +194,29 @@ export function AgentRunBar({ card, orch }: AgentRunBarProps) {
        )}
        {settled && (
          <>
-            <button type="button" style={ghostBtn} onClick={() => setShowPrompt((s) => !s)}>
+            <button
+              type="button"
+              style={ghostBtn}
+              onClick={() => {
+                setShowPrompt((s) => !s);
+                setShowRefine(false);
+              }}
+            >
              ↻ Re-run
            </button>
+            {hasWorktree && run.sessionFile && (
+              <button
+                type="button"
+                style={accentBtn}
+                onClick={() => {
+                  setShowRefine((s) => !s);
+                  setShowPrompt(false);
+                }}
+                title="Continue this run's conversation with feedback"
+              >
+                ✎ Refine
+              </button>
+            )}
            <button type="button" style={ghostBtn} onClick={dismiss} title="Remove this run">
              Dismiss
            </button>
@@ -219,6 +260,33 @@ export function AgentRunBar({ card, orch }: AgentRunBarProps) {
        </div>
      )}

+      {/* Refinement: resume this run's conversation with operator feedback */}
+      {showRefine && settled && hasWorktree && run.sessionFile && (
+        <div style={blockStyle}>
+          <textarea
+            placeholder="Ask for changes — this continues the same conversation (e.g. 'the jump feels too floaty', 'also handle the empty-list case', 'split the large function')…"
+            value={refineDraft}
+            onChange={(e) => setRefineDraft(e.target.value)}
+            onClick={(e) => e.stopPropagation()}
+            style={{ ...inputStyle, width: '100%', minHeight: '60px', resize: 'vertical' }}
+            spellCheck={false}
+            autoFocus
+          />
+          <div style={{ marginTop: '6px', display: 'flex', gap: '6px', alignItems: 'center' }}>
+            <button type="button" style={primaryBtn} onClick={refine} disabled={busy}>
+              {busy ? 'Starting…' : '✎ Refine'}
+            </button>
+            <button type="button" style={ghostBtn} onClick={() => setShowRefine(false)}>
+              Cancel
+            </button>
+            <span style={hintStyle}>
+              Resumes this run's chat in the same worktree — the agent keeps the
+              full prior context and appends your request as the next message.
+            </span>
+          </div>
+        </div>
+      )}
+
      {/* Steer box while running */}
      {isActive && (
        <div style={{ ...blockStyle, display: 'flex', gap: '6px' }}>
--- a/apps/docs/src/components/kanban/useOrchestrator.ts
+++ b/apps/docs/src/components/kanban/useOrchestrator.ts
@@ -4,16 +4,21 @@ import {
  type AgentRun,
  type DiffResult,
  type MergeResult,
-  type RunEvent,
 } from '../../lib/orchestratorApi';

 /**
- * Drives the agentic orchestrator from the board UI.
+ * Shared run registry for the implementation board.
 *
- * Holds the latest run per card plus a live, replayable event log for any run
- * currently being "watched" (typically the expanded card's active run). Live
- * updates arrive over Server-Sent Events; the board stays fully interactive
- * while an agent works a card — start, steer, and stop at any time.
+ * Owns the lightweight, board-level slice of orchestrator state: the run list,
+ * Bevy-playtest flags, and the derived active-run index. It deliberately does
+ * NOT hold the streaming event log — that lives in `useRunStream`, scoped to the
+ * run console (`AgentRunBar`), so a burst of agent events re-renders only the
+ * console and never the board page or card modal. Lifecycle changes observed in
+ * a stream (status / bevy / done) are pushed back here via `patchRun` /
+ * `reflectBevy` so the board's active indicators stay correct.
+ *
+ * The returned object is memoized on its state/callbacks, so its identity is
+ * stable between registry lifecycle changes (not, e.g., on every render).
 */

 export interface UseOrchestrator {
@@ -23,24 +28,22 @@ export interface UseOrchestrator {
  isRunning: (cardId: string) => boolean;
  /** All known runs (newest first), for a global activity view. */
  runs: AgentRun[];
-  /** Watched events keyed by run id (ordered). */
-  eventsForRun: (runId: string) => RunEvent[];
  /** Load initial state. */
  reload: () => Promise<void>;
  loading: boolean;
  error: string | null;
  /** Begin a run for a card. Returns the created run. */
-  start: (input: { cardId: string; prompt?: string }) => Promise<AgentRun>;
+  start: (input: { cardId: string; prompt?: string; refineRunId?: string }) => Promise<AgentRun>;
  /** Send a steer/follow-up message to an active run. */
  message: (runId: string, text: string, mode: 'steer' | 'followUp') => Promise<void>;
  /** Stop an active run. */
  stop: (runId: string) => Promise<void>;
-  /** Open the live event stream for a run (ref-counted; safe to call repeatedly). */
-  watch: (runId: string) => void;
-  /** Release a watch (ref-counted; closes the stream when the last watcher leaves). */
-  unwatch: (runId: string) => void;
  /** Remove a settled run from the UI (and reclaim its worktree). */
  remove: (runId: string) => Promise<void>;
+  /** Apply a partial update to a run record (used by stream reflectors). */
+  patchRun: (runId: string, patch: Partial<AgentRun>) => void;
+  /** Reflect a Bevy playtest lifecycle change (used by stream reflectors). */
+  reflectBevy: (runId: string, running: boolean) => void;
  /** Fetch a run's branch diff vs main. */
  getDiff: (runId: string) => Promise<DiffResult>;
  /** Merge a run's branch into the main worktree. */
@@ -63,16 +66,10 @@ export interface UseOrchestrator {

 export function useOrchestrator(): UseOrchestrator {
  const [runs, setRuns] = useState<AgentRun[]>([]);
-  const [eventsByRun, setEventsByRun] = useState<Record<string, RunEvent[]>>({});
  const [bevyRunning, setBevyRunning] = useState<Set<string>>(new Set());
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);

-  // Refcounted EventSource subscriptions + replay cursors, kept in refs so the
-  // SSE callbacks always see fresh state without re-subscribing.
-  const sources = useRef(new Map<string, EventSource>());
-  const refcounts = useRef(new Map<string, number>());
-  const cursors = useRef(new Map<string, number>());
  // Signature of the last loaded run list, so the background poll can skip
  // state updates (and re-renders) when nothing actually changed.
  const lastSig = useRef('');
@@ -101,9 +98,10 @@ export function useOrchestrator(): UseOrchestrator {
    void reload();
  }, [reload]);

-  // Background poll keeps run status fresh even when no SSE stream is open
-  // (i.e. when no card modal is open). Poll faster while any run is active so a
-  // collapsed card's running indicator turns over promptly when it settles.
+  // Background poll keeps run status fresh even when no card modal is open.
+  // Poll faster while any run is active so a collapsed card's running indicator
+  // turns over promptly when it settles. (The live event stream, when a modal is
+  // open, is the primary updater; this is a liveness backstop.)
  const anyRunning = useMemo(() => runs.some((r) => r.status === 'running'), [runs]);
  useEffect(() => {
    const ms = anyRunning ? 3_000 : 10_000;
@@ -121,17 +119,8 @@ export function useOrchestrator(): UseOrchestrator {
    });
  }, []);

-  const appendEvent = useCallback((runId: string, ev: RunEvent) => {
-    setEventsByRun((prev) => {
-      const cur = prev[runId] ?? [];
-      // Dedup by seq when present (history flush vs. live may overlap).
-      if (typeof ev.seq === 'number' && cur.some((e) => e.seq === ev.seq)) return prev;
-      return { ...prev, [runId]: [...cur, ev] };
-    });
-  }, []);
-
  const start = useCallback(
-    async (input: { cardId: string; prompt?: string }) => {
+    async (input: { cardId: string; prompt?: string; refineRunId?: string }) => {
      const { run } = await orchestratorApi.startRun(input);
      upsertRun(run);
      return run;
@@ -146,106 +135,30 @@ export function useOrchestrator(): UseOrchestrator {
    [],
  );

-  const stop = useCallback(
-    async (runId: string) => {
-      await orchestratorApi.stopRun(runId);
-    },
-    [],
-  );
+  const stop = useCallback(async (runId: string) => {
+    await orchestratorApi.stopRun(runId);
+  }, []);

  const remove = useCallback(async (runId: string) => {
    await orchestratorApi.deleteRun(runId);
    setRuns((prev) => prev.filter((r) => r.id !== runId));
-    setEventsByRun((prev) => {
-      const next = { ...prev };
-      delete next[runId];
+  }, []);
+
+  /** Apply a partial update to a run (status/summary/etc., from stream events). */
+  const patchRun = useCallback((runId: string, patch: Partial<AgentRun>) => {
+    setRuns((prev) => prev.map((r) => (r.id === runId ? { ...r, ...patch } : r)));
+  }, []);
+
+  /** Reflect a Bevy playtest lifecycle change (from stream events). */
+  const reflectBevy = useCallback((runId: string, running: boolean) => {
+    setBevyRunning((prev) => {
+      const next = new Set(prev);
+      if (running) next.add(runId);
+      else next.delete(runId);
      return next;
    });
  }, []);

-  /** (Re)open the SSE stream for a run, replaying persisted history first. */
-  const openStream = useCallback(
-    (runId: string) => {
-      if (sources.current.has(runId)) return;
-      const since = cursors.current.get(runId) ?? 0;
-      const es = new EventSource(`/api/orchestrator/runs/${runId}/stream?since=${since}`);
-      sources.current.set(runId, es);
-      es.addEventListener('event', (msg) => {
-        const ev = JSON.parse((msg as MessageEvent).data) as RunEvent;
-        if (typeof ev.seq === 'number') cursors.current.set(runId, ev.seq);
-        appendEvent(runId, ev);
-        // Reflect status changes onto the run record.
-        if (ev.type === 'status') {
-          const status = ev.data.status as AgentRun['status'];
-          setRuns((prev) =>
-            prev.map((r) =>
-              r.id === runId
-                ? {
-                    ...r,
-                    status,
-                    finishedAt: ['completed', 'failed', 'stopped'].includes(status)
-                      ? new Date().toISOString()
-                      : r.finishedAt,
-                  }
-                : r,
-            ),
-          );
-        }
-        if (ev.type === 'done' && typeof ev.data.summary === 'string') {
-          setRuns((prev) =>
-            prev.map((r) => (r.id === runId ? { ...r, summary: ev.data.summary as string } : r)),
-          );
-        }
-        // Track Bevy playtest lifecycle from its events.
-        if (ev.type === 'bevy') {
-          const phase = ev.data.phase;
-          setBevyRunning((prev) => {
-            const next = new Set(prev);
-            if (phase === 'start') next.add(runId);
-            else if (phase === 'end') next.delete(runId);
-            return next;
-          });
-        }
-      });
-      es.onerror = () => {
-        // EventSource auto-reconnects; nothing to do here.
-      };
-    },
-    [appendEvent],
-  );
-
-  const watch = useCallback(
-    (runId: string) => {
-      const n = (refcounts.current.get(runId) ?? 0) + 1;
-      refcounts.current.set(runId, n);
-      if (n === 1) openStream(runId);
-    },
-    [openStream],
-  );
-
-  const unwatch = useCallback((runId: string) => {
-    const n = (refcounts.current.get(runId) ?? 0) - 1;
-    if (n > 0) {
-      refcounts.current.set(runId, n);
-      return;
-    }
-    refcounts.current.delete(runId);
-    const es = sources.current.get(runId);
-    if (es) {
-      es.close();
-      sources.current.delete(runId);
-    }
-  }, []);
-
-  // Close all streams on unmount.
-  useEffect(() => {
-    return () => {
-      for (const es of sources.current.values()) es.close();
-      sources.current.clear();
-      refcounts.current.clear();
-    };
-  }, []);
-
  const runForCard = useCallback(
    (cardId: string) => {
      const forCard = runs.filter((r) => r.cardId === cardId);
@@ -260,17 +173,8 @@ export function useOrchestrator(): UseOrchestrator {
    [runs],
  );

-  const eventsForRun = useCallback((runId: string) => eventsByRun[runId] ?? [], [eventsByRun]);
-
-  const getDiff = useCallback(
-    (runId: string) => orchestratorApi.getDiff(runId),
-    [],
-  );
-
-  const mergeRun = useCallback(
-    (runId: string) => orchestratorApi.mergeRun(runId),
-    [],
-  );
+  const getDiff = useCallback((runId: string) => orchestratorApi.getDiff(runId), []);
+  const mergeRun = useCallback((runId: string) => orchestratorApi.mergeRun(runId), []);

  const startBevy = useCallback(async (runId: string) => {
    await orchestratorApi.startBevy(runId);
@@ -292,7 +196,6 @@ export function useOrchestrator(): UseOrchestrator {
  /**
   * Card-id index of active runs. Recomputed only when `runs` or `bevyRunning`
   * changes — NOT on every streamed event — so memoized consumers stay stable.
-   * Replaces the per-card `.filter().find()` scans the board used to do.
   */
  const activeByCard = useMemo(() => {
    const m = new Map<string, { running: boolean; bevy: boolean; runId: string }>();
@@ -318,26 +221,50 @@ export function useOrchestrator(): UseOrchestrator {
    }
  }, []);

-  return {
-    runForCard,
-    isRunning,
-    runs,
-    eventsForRun,
-    reload,
-    loading,
-    error,
-    start,
-    message,
-    stop,
-    watch,
-    unwatch,
-    remove,
-    getDiff,
-    mergeRun,
-    startBevy,
-    stopBevy,
-    bevyIsRunning,
-    refreshBevyStatus,
-    activeByCard,
-  };
+  // Stable identity: re-created only when registry state changes, so consumers
+  // (and the per-run stream effect) don't churn on unrelated renders.
+  return useMemo<UseOrchestrator>(
+    () => ({
+      runForCard,
+      isRunning,
+      runs,
+      reload,
+      loading,
+      error,
+      start,
+      message,
+      stop,
+      remove,
+      patchRun,
+      reflectBevy,
+      getDiff,
+      mergeRun,
+      startBevy,
+      stopBevy,
+      bevyIsRunning,
+      refreshBevyStatus,
+      activeByCard,
+    }),
+    [
+      runForCard,
+      isRunning,
+      runs,
+      reload,
+      loading,
+      error,
+      start,
+      message,
+      stop,
+      remove,
+      patchRun,
+      reflectBevy,
+      getDiff,
+      mergeRun,
+      startBevy,
+      stopBevy,
+      bevyIsRunning,
+      refreshBevyStatus,
+      activeByCard,
+    ],
+  );
 }
--- a/apps/docs/src/components/kanban/useRunStream.ts
+++ b/apps/docs/src/components/kanban/useRunStream.ts
@@ -0,0 +1,71 @@
+import { useEffect, useState } from 'react';
+import type { AgentRun, RunEvent } from '../../lib/orchestratorApi';
+import type { UseOrchestrator } from './useOrchestrator';
+
+/**
+ * Live event stream for a single agent run, owned locally by the run console
+ * (`AgentRunBar`).
+ *
+ * Keeping the stream — and the event log it accumulates — here, rather than in
+ * the board-level `useOrchestrator`, means a burst of streamed events re-renders
+ * ONLY the console. The board page and card modal are untouched by per-event
+ * updates, which is what eliminates the frame drops at run start.
+ *
+ * Lifecycle events (status / bevy / done) are reflected back into the shared
+ * run registry via `patchRun` / `reflectBevy`, so the board's active indicators
+ * and the run record stay correct. The server replays persisted history first
+ * (`since=0`), so opening a modal shows the run's prior activity; duplicates are
+ * de-duped by `seq`.
+ *
+ * Returns the ordered events for `runId` (empty while `runId` is null).
+ */
+export function useRunStream(runId: string | null, orch: UseOrchestrator): RunEvent[] {
+  // Pull the two reflectors out so the effect can depend on THESE stable
+  // callbacks (each is `useCallback(..., [])`) rather than the whole `orch`
+  // object — whose identity changes on every registry mutation. Depending on
+  // `orch` directly would tear down + reopen the EventSource (and wipe the log)
+  // on every status/bevy/done event, looping forever. See `patchRun`/
+  // `reflectBevy` in `useOrchestrator`.
+  const { patchRun, reflectBevy } = orch;
+  const [events, setEvents] = useState<RunEvent[]>([]);
+
+  useEffect(() => {
+    // Reset when switching runs (or closing the console).
+    setEvents([]);
+    if (!runId) return;
+
+    const es = new EventSource(`/api/orchestrator/runs/${runId}/stream?since=0`);
+
+    const onEvent = (msg: Event) => {
+      const ev = JSON.parse((msg as MessageEvent).data) as RunEvent;
+      setEvents((prev) => {
+        // Dedup by seq (history replay vs. live may overlap).
+        if (typeof ev.seq === 'number' && prev.some((e) => e.seq === ev.seq)) return prev;
+        return [...prev, ev];
+      });
+
+      // Reflect lifecycle events back to the shared registry.
+      if (ev.type === 'status') {
+        const status = ev.data.status as AgentRun['status'];
+        const settling = ['completed', 'failed', 'stopped'].includes(status);
+        patchRun(runId, settling ? { status, finishedAt: new Date().toISOString() } : { status });
+      } else if (ev.type === 'done' && typeof ev.data.summary === 'string') {
+        patchRun(runId, { summary: ev.data.summary });
+      } else if (ev.type === 'bevy') {
+        const phase = ev.data.phase;
+        if (phase === 'start') reflectBevy(runId, true);
+        else if (phase === 'end') reflectBevy(runId, false);
+      }
+    };
+
+    es.addEventListener('event', onEvent);
+    // EventSource auto-reconnects on error; nothing to do on onerror.
+
+    return () => {
+      es.removeEventListener('event', onEvent);
+      es.close();
+    };
+  }, [runId, patchRun, reflectBevy]);
+
+  return events;
+}
--- a/apps/docs/src/lib/orchestratorApi.ts
+++ b/apps/docs/src/lib/orchestratorApi.ts
@@ -17,8 +17,12 @@ export interface AgentRun {
  cardId: string;
  status: RunStatus;
  useWorktree: boolean;
+  /** Whether this run created (and therefore owns/cleans up) its worktree. */
+  ownsWorktree: boolean;
  branch: string | null;
  worktreePath: string | null;
+  /** pi session JSONL path persisted for this run (resumable by refinements). */
+  sessionFile: string | null;
  prompt: string;
  summary: string | null;
  commitSha: string | null;
@@ -78,6 +82,11 @@ export interface StartRunInput {
  prompt?: string;
  useWorktree?: boolean;
  cleanupOnFinish?: boolean;
+  /**
+   * If set, start a refinement run that continues this prior run's branch with
+   * `prompt` as the operator's refinement feedback.
+   */
+  refineRunId?: string;
 }

 /** A commit on a run's branch that is not yet on main. */