feat: add web_fetch tool, rename to pi-web

- Rename package to @evan/pi-web (repo rename handled separately).
- Rename existing 'search' tool to 'web_search' for consistency.
- Add 'web_fetch' tool: navigates via the shared headless Firefox,
  extracts via Mozilla Readability, falls back to <body> when no
  article is detected, converts with Turndown. 50KB cap, 15s nav
  timeout. Description steers LLM to curl for raw/non-text content.
- Reuses the shared driver, so search + fetch share one warm browser.
This commit is contained in:
2026-05-25 11:51:46 -04:00
parent ebd7218b95
commit 67ce141b1b
5 changed files with 754 additions and 24 deletions

View File

@@ -1,8 +1,10 @@
// Pi-Search Extension - Registers a single `search` tool. Provider (kagi or
// searxng) is chosen via ~/.pi/pi-search/config.json or PI_SEARCH_PROVIDER.
// Pi-Web Extension - Registers `web_search` and `web_fetch` tools backed by
// a shared headless Firefox session. Provider config lives in
// ~/.pi/pi-search/config.json (env overrides supported).
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "typebox";
import { ConfigError, loadConfig, resolveSettings } from "./src/config.ts";
import { fetchPage, type FetchResult } from "./src/fetch.ts";
import { searchKagi } from "./src/providers/kagi.ts";
import { searchSearxng } from "./src/providers/searxng.ts";
import { SearchError, type SearchResult } from "./src/types.ts";
@@ -14,6 +16,14 @@ function formatResults(results: SearchResult[]): string {
.join("\n\n");
}
function formatFetch(r: FetchResult): string {
const header =
r.title && r.title.trim()
? `# ${r.title}\n\n<${r.finalUrl}>\n\n`
: `<${r.finalUrl}>\n\n`;
return `${header}${r.markdown}`;
}
async function runSearch(query: string): Promise<SearchResult[]> {
const config = loadConfig();
const settings = resolveSettings(config);
@@ -27,7 +37,7 @@ async function runSearch(query: string): Promise<SearchResult[]> {
export default function (pi: ExtensionAPI) {
pi.registerTool({
name: "search",
name: "web_search",
label: "Web Search",
description:
"Search the web. Returns a markdown list of titles, URLs, and snippets.",
@@ -54,4 +64,40 @@ export default function (pi: ExtensionAPI) {
}
},
});
pi.registerTool({
name: "web_fetch",
label: "Web Fetch",
description:
"Fetch a URL and return the page content as readable markdown (Readability + Turndown over a headless Firefox session, so JS-rendered pages work). For raw HTML, non-text content (PDFs, images), or simple HTTP requests, use bash with curl instead.",
promptSnippet: "Fetch a URL and convert the page to readable markdown",
parameters: Type.Object({
url: Type.String({ description: "Absolute URL to fetch" }),
}),
async execute(_toolCallId, params) {
try {
const result = await fetchPage({ url: params.url });
return {
content: [{ type: "text", text: formatFetch(result) }],
details: { raw: result },
};
} catch (err) {
if (err instanceof SearchError) {
return {
content: [{ type: "text", text: `Fetch error: ${err.message}` }],
isError: true,
details: {
raw: {
url: params.url,
finalUrl: params.url,
markdown: "",
truncated: false,
} satisfies FetchResult,
},
};
}
throw err;
}
},
});
}