feat(subagent): add sticky child finalization

2026-05-12 12:48:26 -04:00
parent 9956b65e17
commit 9488aab237
3 changed files with 340 additions and 29 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -0,0 +1,33 @@
 # Repo Guidelines
 ## Scope
 This repo implements the `subagent` pi extension in `index.ts`.
 ## Key Behavior
 - Parent-facing tool: `subagent`.
 - Internal child-only tool: `subagent_finalize`.
 - Never register `subagent_finalize` in the parent context; only when `PI_SUBAGENT_CHILD=1`.
 - Subagent sessions are sticky and persisted at:
  `~/.pi/subagent-sessions/<cwd-hash>/<agent>_<sessionId>.jsonl`.
 - Omitting `sessionId` creates a new UUID-backed session.
 - Passing `sessionId` resumes the same agent/cwd child session.
 ## Validation
 - Child agents must finish by calling `subagent_finalize`.
 - Valid success: `status=SUCCESS` with non-empty `result`.
 - Valid error: `status=ERROR` with non-empty `error`; `result` is optional partial findings.
 - If finalization is missing/invalid, retry by continuing the same sticky session, capped by `MAX_FINALIZE_RETRIES`.
 ## Commands
 - Typecheck: `npm run typecheck`
 - Lint: `npm run lint`
 ## Style
 - Keep the extension simple; avoid debug metadata unless needed.
 - Do not expose retry internals to the parent by default.
 - Prefer precise tool/context isolation over prompt-only enforcement.
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ System prompt for the subagent goes here.
 ```
 - `approved_tools` / `allowed_tools` is whitelist mode: the subagent receives only those tools.
- `denied_tools` is blacklist mode: the subagent inherits currently active tools except those tools and `subagent` itself.
+- `denied_tools` is blacklist mode: the subagent inherits currently active tools except `subagent` and internal tools.
 - Do not define whitelist and blacklist fields together; the extension rejects ambiguous configs.
 ## Tool usage
@@ -28,4 +28,75 @@ System prompt for the subagent goes here.
 }
 ```
-The extension spawns an isolated `pi --mode json -p --no-session` subprocess with the prompt appended as system instructions and `--tools` set to the resolved tool list.
+The tool returns the subagent's finalized result as normal text with a compact status/session header for clients that hide structured tool metadata:
 ```md
 **Status:** SUCCESS  
 **Session ID:** `550e8400-e29b-41d4-a716-446655440000`
 ---
 ...
 ```
 Metadata also includes the sticky session id:
 ```json
 {
  "sessionId": "550e8400-e29b-41d4-a716-446655440000",
  "finalized": {
    "status": "SUCCESS",
    "result": "..."
  }
 }
 ```
 To continue the same child subagent context, pass the returned `sessionId`:
 ```json
 {
  "agent": "scout",
  "sessionId": "550e8400-e29b-41d4-a716-446655440000",
  "task": "Keep going and inspect the registration path"
 }
 ```
 If `sessionId` is omitted, a new child session is created.
 ## Sticky sessions
 Subagent sessions persist under:
 ```txt
 ~/.pi/subagent-sessions/<cwd-hash>/<agent>_<sessionId>.jsonl
 ```
 Session identity is scoped by working directory, agent name, and session id. Reusing the same `sessionId` with the same agent and cwd resumes that child context.
 ## Finalization
 Subagents do not return final answers as free-form assistant text. Child runs get an internal `subagent_finalize` tool and must call it as their final action:
 ```ts
 subagent_finalize({
  status: "SUCCESS" | "ERROR",
  result?: string,
  error?: string,
 })
 ```
 Rules:
 - `SUCCESS` requires a non-empty `result`.
 - `ERROR` requires a non-empty `error`.
 - `ERROR.result` may contain partial findings.
 - If the child exits without valid finalization, the extension continues the same session with corrective feedback, up to a small retry limit.
 - `subagent_finalize` is registered only in child subagent processes, never in the parent context.
 ## How it runs
 The extension spawns an isolated child pi process in JSON mode with the resolved tools and a persistent session file:
 ```txt
 pi --mode json -p --session <session-file> --tools <resolved-tools> <task>
 ```
--- a/index.ts
+++ b/index.ts
@@ -1,6 +1,7 @@
 // Subagent Extension - Registers a tool for delegating work to prompt-defined
 // subagents with constrained tool permissions.
 import { spawn } from "node:child_process";
 import { createHash, randomUUID } from "node:crypto";
 import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
@@ -36,19 +37,44 @@ interface SubagentStatus {
  recentToolCalls: SubagentToolActivity[];
 }
 interface SubagentFinalizePayload {
  status: "SUCCESS" | "ERROR";
  result?: string;
  error?: string;
 }
 interface SubagentResult {
  agent: string;
  task: string;
  tools: string[];
  sessionId: string;
  exitCode: number;
  output: string;
  stderr: string;
  status: SubagentStatus;
  finalized?: SubagentFinalizePayload;
  error?: string;
 }
-const EXTENSION_DIR = path.dirname(fileURLToPath(import.meta.url));
+const EXTENSION_ENTRY = fileURLToPath(import.meta.url);
 const EXTENSION_DIR = path.dirname(EXTENSION_ENTRY);
 const PROMPTS_DIR = path.join(EXTENSION_DIR, "prompts");
 const FINALIZE_TOOL_NAME = "subagent_finalize";
 const MAX_FINALIZE_RETRIES = 2;
 // Format Tool Content - Some clients hide structured details from the model.
 function formatSubagentContent(
  status: "SUCCESS" | "ERROR",
  sessionId: string,
  result?: string,
  error?: string,
 ): string {
  const header = [`**Status:** ${status}`, `**Session ID:** \`${sessionId}\``];
  if (error?.trim()) header.push(`**Error:** ${error.trim()}`);
  const body = result?.trimEnd();
  return body ? `${header.join("  \n")}\n\n---\n\n${body}` : header.join("  \n");
 }
 // Parse Tool List - Frontmatter may use YAML arrays or comma-delimited strings.
 function parseToolList(value: unknown): string[] {
@@ -108,16 +134,68 @@ function discoverPrompts(): PromptConfig[] {
 // Resolve Tools - Use exactly one permission mode. approved_tools/allowed_tools
 // is a whitelist; denied_tools is a blacklist over the currently active tools.
 function resolveTools(agent: PromptConfig, activeTools: string[]): string[] {
-  if (agent.approvedTools.length > 0) {
+  const withoutDelegationTools = (tools: string[]) =>
-    return [...new Set(agent.approvedTools)].filter(
+    tools.filter((tool) => tool !== "subagent" && tool !== FINALIZE_TOOL_NAME);
-      (tool) => tool !== "subagent",
+
  const resolved =
    agent.approvedTools.length > 0
      ? withoutDelegationTools([...new Set(agent.approvedTools)])
      : withoutDelegationTools(
          [...new Set(activeTools)].filter(
            (tool) => !new Set(agent.deniedTools).has(tool),
          ),
        );
  return [...new Set([...resolved, FINALIZE_TOOL_NAME])];
 }
 // Build Finalize Prompt - Child agents must terminate by calling this tool.
 function buildSubagentPrompt(agent: PromptConfig): string {
  const finalizePrompt = [
    "You are running as a subagent.",
    `When the task is complete, call ${FINALIZE_TOOL_NAME} as your final action.`,
    "Do not provide the final answer as normal assistant text.",
    `${FINALIZE_TOOL_NAME} requires status SUCCESS with result, or status ERROR with error and optional result.`,
  ].join("\n");
  return [agent.systemPrompt, finalizePrompt].filter(Boolean).join("\n\n");
 }
 // Session Path - Persist child sessions as <agent>_<uuid>.jsonl under a cwd hash.
 function getSubagentSessionPath(
  cwd: string,
  agentName: string,
  sessionId: string,
 ): string {
  const cwdHash = createHash("sha256").update(cwd).digest("hex").slice(0, 16);
  const safeAgent = agentName.replace(/[^\w.-]+/g, "_");
  const safeSessionId = sessionId.replace(/[^\w.-]+/g, "_");
  return path.join(
    os.homedir(),
    ".pi",
    "subagent-sessions",
    cwdHash,
    `${safeAgent}_${safeSessionId}.jsonl`,
  );
 }
-  const denied = new Set(agent.deniedTools);
+// Validate Finalize Payload - Keep the parent contract strict and small.
-  return [...new Set(activeTools)].filter(
+function validateFinalizePayload(value: unknown): SubagentFinalizePayload | null {
-    (tool) => tool !== "subagent" && !denied.has(tool),
+  if (!value || typeof value !== "object") return null;
-  );
+  const payload = value as Record<string, unknown>;
  if (payload.status === "SUCCESS") {
    return typeof payload.result === "string" && payload.result.trim()
      ? { status: "SUCCESS", result: payload.result }
      : null;
  }
  if (payload.status === "ERROR") {
    const result =
      typeof payload.result === "string" ? payload.result : undefined;
    return typeof payload.error === "string" && payload.error.trim()
      ? { status: "ERROR", error: payload.error, result }
      : null;
  }
  return null;
 }
 // Write Prompt - pi accepts appended system prompts via file path.
@@ -256,6 +334,8 @@ async function runAgent(
  agent: PromptConfig,
  task: string,
  tools: string[],
  sessionId: string,
  sessionPath: string,
  signal?: AbortSignal,
  onUpdate?: (partial: {
    content: { type: "text"; text: string }[];
@@ -269,6 +349,7 @@ async function runAgent(
    agent: agent.name,
    task,
    tools,
    sessionId,
    exitCode: 0,
    output: "",
    stderr: "",
@@ -305,23 +386,33 @@ async function runAgent(
  };
  try {
-    const args = ["--mode", "json", "-p", "--no-session"];
+    const args = [
      "--mode",
      "json",
      "-p",
      "--session",
      sessionPath,
      "--extension",
      EXTENSION_ENTRY,
    ];
    args.push("--tools", tools.join(","));
-    if (agent.systemPrompt) {
+    const prompt = buildSubagentPrompt(agent);
-      const tmp = await writePromptToTempFile(agent.name, agent.systemPrompt);
+    if (prompt) {
      const tmp = await writePromptToTempFile(agent.name, prompt);
      tmpDir = tmp.dir;
      tmpPromptPath = tmp.filePath;
      args.push("--append-system-prompt", tmpPromptPath);
    }
-    args.push(`Task: ${task}`);
+    args.push(task);
    emitUpdate();
    const exitCode = await new Promise<number>((resolve) => {
      const invocation = getPiInvocation(args);
      const proc = spawn(invocation.command, invocation.args, {
        cwd,
        env: { ...process.env, PI_SUBAGENT_CHILD: "1" },
        shell: false,
        stdio: ["ignore", "pipe", "pipe"],
      });
@@ -344,13 +435,17 @@ async function runAgent(
            const id = String(
              event.toolCallId ?? result.status.toolCallCount + 1,
            );
            const toolName = String(event.toolName ?? "tool");
            if (toolName === FINALIZE_TOOL_NAME) {
              result.finalized = validateFinalizePayload(event.args) ?? undefined;
            }
            activeToolIds.add(id);
            result.status.state = "running";
            result.status.toolCallCount += 1;
            result.status.activeToolCalls = activeToolIds.size;
            rememberToolCall({
              id,
-              toolName: String(event.toolName ?? "tool"),
+              toolName,
              summary: formatToolArgs(event.args),
              status: "running",
            });
@@ -447,6 +542,12 @@ const SubagentParams = Type.Object({
    description: "Name of the prompt-defined subagent to invoke",
  }),
  task: Type.String({ description: "Task to delegate to the subagent" }),
  sessionId: Type.Optional(
    Type.String({
      description:
        "Optional sticky subagent session id. Reuse to continue a previous subagent context.",
    }),
  ),
  cwd: Type.Optional(
    Type.String({
      description:
@@ -456,6 +557,30 @@ const SubagentParams = Type.Object({
 });
 export default function (pi: ExtensionAPI) {
  if (process.env.PI_SUBAGENT_CHILD === "1") {
    pi.registerTool({
      name: FINALIZE_TOOL_NAME,
      label: "Subagent Finalize",
      description:
        "Internal subagent-only tool. Call this as your final action when delegated work is complete.",
      promptSnippet:
        "Call subagent_finalize as your final action when subagent work is complete.",
      parameters: Type.Object({
        status: Type.Union([Type.Literal("SUCCESS"), Type.Literal("ERROR")]),
        result: Type.Optional(Type.String()),
        error: Type.Optional(Type.String()),
      }),
      async execute(_toolCallId, params) {
        return {
          content: [{ type: "text", text: "Subagent finalized." }],
          details: params,
          terminate: true,
        };
      },
    });
  }
  pi.registerTool({
    name: "subagent",
    label: "Subagent",
@@ -517,21 +642,103 @@ export default function (pi: ExtensionAPI) {
      }
      const cwd = path.resolve(ctx.cwd, params.cwd ?? ".");
-      const result = await runAgent(
+      const sessionId = params.sessionId ?? randomUUID();
      const sessionPath = getSubagentSessionPath(cwd, agent.name, sessionId);
      await fs.promises.mkdir(path.dirname(sessionPath), { recursive: true });
      let result: SubagentResult | null = null;
      for (let retryCount = 0; retryCount <= MAX_FINALIZE_RETRIES; retryCount += 1) {
        const task =
          retryCount === 0
            ? `Task: ${params.task}`
            : [
                "Your previous response did not finalize correctly.",
                `If you are finished, call ${FINALIZE_TOOL_NAME}.`,
                "If you are not finished, continue the original task using available tools as needed.",
                `Original task: ${params.task}`,
              ].join("\n\n");
        result = await runAgent(
          cwd,
          agent,
-        params.task,
+          task,
          tools,
          sessionId,
          sessionPath,
          signal,
          _onUpdate,
        );
-      const failed = result.exitCode !== 0 || Boolean(result.error);
+
-      const fallback = result.error || result.stderr || "(no output)";
+        if (result.finalized) break;
        if (result.exitCode !== 0 || result.error) break;
      }
      if (!result) {
        return {
          content: [
            {
              type: "text",
              text: formatSubagentContent(
                "ERROR",
                sessionId,
                undefined,
                "Subagent did not run.",
              ),
            },
          ],
          details: { sessionId },
          isError: true,
        };
      }
      if (!result.finalized) {
        const fallback =
          result.error ||
          result.stderr ||
          `Subagent did not call ${FINALIZE_TOOL_NAME}.`;
        return {
          content: [
            {
              type: "text",
              text: formatSubagentContent("ERROR", sessionId, undefined, fallback),
            },
          ],
          details: { sessionId },
          isError: true,
        };
      }
      if (result.finalized.status === "ERROR") {
        return {
          content: [
            {
              type: "text",
              text: formatSubagentContent(
                "ERROR",
                sessionId,
                result.finalized.result,
                result.finalized.error ?? "Subagent failed.",
              ),
            },
          ],
          details: { sessionId, finalized: result.finalized },
          isError: true,
        };
      }
      return {
-        content: [{ type: "text", text: result.output || fallback }],
+        content: [
-        details: result,
+          {
-        isError: failed,
+            type: "text",
            text: formatSubagentContent(
              "SUCCESS",
              sessionId,
              result.finalized.result,
            ),
          },
        ],
        details: { sessionId, finalized: result.finalized },
        isError: false,
      };
    },