feat: add web_fetch tool, rename to pi-web

- Rename package to @evan/pi-web (repo rename handled separately).
- Rename existing 'search' tool to 'web_search' for consistency.
- Add 'web_fetch' tool: navigates via the shared headless Firefox,
  extracts via Mozilla Readability, falls back to <body> when no
  article is detected, converts with Turndown. 50KB cap, 15s nav
  timeout. Description steers LLM to curl for raw/non-text content.
- Reuses the shared driver, so search + fetch share one warm browser.
This commit is contained in:
2026-05-25 11:51:46 -04:00
parent ebd7218b95
commit 67ce141b1b
5 changed files with 754 additions and 24 deletions

View File

@@ -1,16 +1,21 @@
# evan/pi-search
# evan/pi-web
Web search extension for [pi coding agent](https://github.com/mariozechner/pi-coding-agent). Registers a single `search` tool. Choose your provider via config.
Web tools for [pi coding agent](https://github.com/mariozechner/pi-coding-agent). Registers two tools backed by a shared headless Firefox session that's kept warm for the lifetime of the pi process.
## Providers
| Tool | Purpose |
| ------------ | ----------------------------------------------------------------------- |
| `web_search` | Search the web via Kagi (session token) or SearXNG (JSON API). |
| `web_fetch` | Fetch a URL and return readable markdown (Readability + Turndown). |
For raw HTTP responses, non-text content, or simple API calls, the LLM is steered toward `bash` + `curl` rather than `web_fetch`.
## Search Providers
| Provider | How it works | Requires |
| --------- | --------------------------------------------------------------------------------------------- | -------------------------------------------- |
| `kagi` | Drives a headless Firefox session against `kagi.com/search?token=…&q=…` and scrapes results. | Kagi session token, `firefox`, `geckodriver` |
| `searxng` | Calls a SearXNG instance's `/search?format=json` endpoint. | A SearXNG base URL with JSON format enabled |
The Kagi driver is shared across calls for the lifetime of the pi process, so you only pay browser startup once per session.
## Config
Drop a JSON file at `~/.pi/pi-search/config.json`:
@@ -30,7 +35,7 @@ Drop a JSON file at `~/.pi/pi-search/config.json`:
### Env Var Overrides
| Variable | Overrides |
| ------------------------- | -------------------- |
| ----------------------- | ----------------- |
| `PI_SEARCH_PROVIDER` | `provider` |
| `KAGI_TOKEN` | `kagi.token` |
| `PI_SEARCH_SEARXNG_URL` | `searxng.baseUrl` |
@@ -39,16 +44,34 @@ Drop a JSON file at `~/.pi/pi-search/config.json`:
Open `kagi.com`, sign in, then go to **Settings → Session Link**. Copy the `token=` value from the link. Treat it like a password — it grants full account access.
## Tool
## Tools
| Name | Args | Returns |
| -------- | ------------------- | ------------------------------------------------------ |
| `search` | `query: string` | Markdown list of `## [title](url)\n> description` items |
### `web_search`
| Arg | Type | Description |
| ------- | ------ | ----------------- |
| `query` | string | Search query text |
Returns a markdown list of `## [title](url)\n> description` items.
### `web_fetch`
| Arg | Type | Description |
| ----- | ------ | --------------------- |
| `url` | string | Absolute URL to fetch |
Returns markdown of the page. Pipeline:
1. Navigate the shared Firefox session to the URL (15s timeout).
2. Run [Readability](https://github.com/mozilla/readability) to extract the article subtree.
3. If Readability finds nothing, fall back to the full `<body>`.
4. Convert with [Turndown](https://github.com/mixmark-io/turndown).
5. Truncate at 50KB with a clear marker.
## Install
```bash
cd ~/.pi/agent/extensions/pi-search
cd ~/.pi/agent/extensions/pi-web
npm install
```

View File

@@ -1,8 +1,10 @@
// Pi-Search Extension - Registers a single `search` tool. Provider (kagi or
// searxng) is chosen via ~/.pi/pi-search/config.json or PI_SEARCH_PROVIDER.
// Pi-Web Extension - Registers `web_search` and `web_fetch` tools backed by
// a shared headless Firefox session. Provider config lives in
// ~/.pi/pi-search/config.json (env overrides supported).
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "typebox";
import { ConfigError, loadConfig, resolveSettings } from "./src/config.ts";
import { fetchPage, type FetchResult } from "./src/fetch.ts";
import { searchKagi } from "./src/providers/kagi.ts";
import { searchSearxng } from "./src/providers/searxng.ts";
import { SearchError, type SearchResult } from "./src/types.ts";
@@ -14,6 +16,14 @@ function formatResults(results: SearchResult[]): string {
.join("\n\n");
}
function formatFetch(r: FetchResult): string {
const header =
r.title && r.title.trim()
? `# ${r.title}\n\n<${r.finalUrl}>\n\n`
: `<${r.finalUrl}>\n\n`;
return `${header}${r.markdown}`;
}
async function runSearch(query: string): Promise<SearchResult[]> {
const config = loadConfig();
const settings = resolveSettings(config);
@@ -27,7 +37,7 @@ async function runSearch(query: string): Promise<SearchResult[]> {
export default function (pi: ExtensionAPI) {
pi.registerTool({
name: "search",
name: "web_search",
label: "Web Search",
description:
"Search the web. Returns a markdown list of titles, URLs, and snippets.",
@@ -54,4 +64,40 @@ export default function (pi: ExtensionAPI) {
}
},
});
pi.registerTool({
name: "web_fetch",
label: "Web Fetch",
description:
"Fetch a URL and return the page content as readable markdown (Readability + Turndown over a headless Firefox session, so JS-rendered pages work). For raw HTML, non-text content (PDFs, images), or simple HTTP requests, use bash with curl instead.",
promptSnippet: "Fetch a URL and convert the page to readable markdown",
parameters: Type.Object({
url: Type.String({ description: "Absolute URL to fetch" }),
}),
async execute(_toolCallId, params) {
try {
const result = await fetchPage({ url: params.url });
return {
content: [{ type: "text", text: formatFetch(result) }],
details: { raw: result },
};
} catch (err) {
if (err instanceof SearchError) {
return {
content: [{ type: "text", text: `Fetch error: ${err.message}` }],
isError: true,
details: {
raw: {
url: params.url,
finalUrl: params.url,
markdown: "",
truncated: false,
} satisfies FetchResult,
},
};
}
throw err;
}
},
});
}

569
package-lock.json generated
View File

@@ -8,12 +8,17 @@
"name": "@evan/pi-search",
"version": "0.1.0",
"dependencies": {
"selenium-webdriver": "^4.43.0"
"@mozilla/readability": "^0.6.0",
"jsdom": "^29.1.1",
"selenium-webdriver": "^4.43.0",
"turndown": "^7.2.4"
},
"devDependencies": {
"@mariozechner/pi-coding-agent": "^0.72.0",
"@types/jsdom": "^28.0.3",
"@types/node": "^22.10.0",
"@types/selenium-webdriver": "^4.35.5",
"@types/turndown": "^5.0.6",
"oxlint": "^1.62.0",
"tsx": "^4.19.2",
"typebox": "^1.1.37",
@@ -41,6 +46,53 @@
}
}
},
"node_modules/@asamuzakjp/css-color": {
"version": "5.1.11",
"resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.11.tgz",
"integrity": "sha512-KVw6qIiCTUQhByfTd78h2yD1/00waTmm9uy/R7Ck/ctUyAPj+AEDLkQIdJW0T8+qGgj3j5bpNKK7Q3G+LedJWg==",
"license": "MIT",
"dependencies": {
"@asamuzakjp/generational-cache": "^1.0.1",
"@csstools/css-calc": "^3.2.0",
"@csstools/css-color-parser": "^4.1.0",
"@csstools/css-parser-algorithms": "^4.0.0",
"@csstools/css-tokenizer": "^4.0.0"
},
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
}
},
"node_modules/@asamuzakjp/dom-selector": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-7.1.1.tgz",
"integrity": "sha512-67RZDnYRc8H/8MLDgQCDE//zoqVFwajkepHZgmXrbwybzXOEwOWGPYGmALYl9J2DOLfFPPs6kKCqmbzV895hTQ==",
"license": "MIT",
"dependencies": {
"@asamuzakjp/generational-cache": "^1.0.1",
"@asamuzakjp/nwsapi": "^2.3.9",
"bidi-js": "^1.0.3",
"css-tree": "^3.2.1",
"is-potential-custom-element-name": "^1.0.1"
},
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
}
},
"node_modules/@asamuzakjp/generational-cache": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/@asamuzakjp/generational-cache/-/generational-cache-1.0.1.tgz",
"integrity": "sha512-wajfB8KqzMCN2KGNFdLkReeHncd0AslUSrvHVvvYWuU8ghncRJoA50kT3zP9MVL0+9g4/67H+cdvBskj9THPzg==",
"license": "MIT",
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
}
},
"node_modules/@asamuzakjp/nwsapi": {
"version": "2.3.9",
"resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz",
"integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==",
"license": "MIT"
},
"node_modules/@aws-crypto/crc32": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz",
@@ -517,6 +569,152 @@
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/@bramus/specificity": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/@bramus/specificity/-/specificity-2.4.2.tgz",
"integrity": "sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==",
"license": "MIT",
"dependencies": {
"css-tree": "^3.0.0"
},
"bin": {
"specificity": "bin/cli.js"
}
},
"node_modules/@csstools/color-helpers": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz",
"integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/csstools"
},
{
"type": "opencollective",
"url": "https://opencollective.com/csstools"
}
],
"license": "MIT-0",
"engines": {
"node": ">=20.19.0"
}
},
"node_modules/@csstools/css-calc": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.2.1.tgz",
"integrity": "sha512-DtdHlgXh5ZkA43cwBcAm+huzgJiwx3ZTWVjBs94kwz2xKqSimDA3lBgCjphYgwgVUMWatSM0pDd8TILB1yrVVg==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/csstools"
},
{
"type": "opencollective",
"url": "https://opencollective.com/csstools"
}
],
"license": "MIT",
"engines": {
"node": ">=20.19.0"
},
"peerDependencies": {
"@csstools/css-parser-algorithms": "^4.0.0",
"@csstools/css-tokenizer": "^4.0.0"
}
},
"node_modules/@csstools/css-color-parser": {
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.1.1.tgz",
"integrity": "sha512-eZ5XOtyhK+mggRafYUWzA0tvaYOFgdY8AkgQiCJF9qNAePnUo/zmsqqYubBBb3sQ8uNUaSKTY9s9klfRaAXL0g==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/csstools"
},
{
"type": "opencollective",
"url": "https://opencollective.com/csstools"
}
],
"license": "MIT",
"dependencies": {
"@csstools/color-helpers": "^6.0.2",
"@csstools/css-calc": "^3.2.1"
},
"engines": {
"node": ">=20.19.0"
},
"peerDependencies": {
"@csstools/css-parser-algorithms": "^4.0.0",
"@csstools/css-tokenizer": "^4.0.0"
}
},
"node_modules/@csstools/css-parser-algorithms": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz",
"integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/csstools"
},
{
"type": "opencollective",
"url": "https://opencollective.com/csstools"
}
],
"license": "MIT",
"engines": {
"node": ">=20.19.0"
},
"peerDependencies": {
"@csstools/css-tokenizer": "^4.0.0"
}
},
"node_modules/@csstools/css-syntax-patches-for-csstree": {
"version": "1.1.4",
"resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.4.tgz",
"integrity": "sha512-wgsqt92b7C7tQhIdPNxj0n9zuUbQlvAuI1exyzeNrOKOi62SD7ren8zqszmpVREjAOqg8cD2FqYhQfAuKjk4sw==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/csstools"
},
{
"type": "opencollective",
"url": "https://opencollective.com/csstools"
}
],
"license": "MIT-0",
"peerDependencies": {
"css-tree": "^3.2.1"
},
"peerDependenciesMeta": {
"css-tree": {
"optional": true
}
}
},
"node_modules/@csstools/css-tokenizer": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz",
"integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/csstools"
},
{
"type": "opencollective",
"url": "https://opencollective.com/csstools"
}
],
"license": "MIT",
"engines": {
"node": ">=20.19.0"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.28.0",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.0.tgz",
@@ -959,6 +1157,23 @@
"node": ">=18"
}
},
"node_modules/@exodus/bytes": {
"version": "1.15.1",
"resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.1.tgz",
"integrity": "sha512-S6mL0yNB/Abt9Ei4tq8gDhcczc4S3+vQ4ra7vxnAf+YHC02srtqxKKZghx2Dq6p0e66THKwR6r8N6P95wEty7Q==",
"license": "MIT",
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
},
"peerDependencies": {
"@noble/hashes": "^1.8.0 || ^2.0.0"
},
"peerDependenciesMeta": {
"@noble/hashes": {
"optional": true
}
}
},
"node_modules/@google/genai": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.52.0.tgz",
@@ -1303,6 +1518,21 @@
"zod-to-json-schema": "^3.25.0"
}
},
"node_modules/@mixmark-io/domino": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz",
"integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==",
"license": "BSD-2-Clause"
},
"node_modules/@mozilla/readability": {
"version": "0.6.0",
"resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.6.0.tgz",
"integrity": "sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==",
"license": "Apache-2.0",
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/@nodable/entities": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@nodable/entities/-/entities-2.1.0.tgz",
@@ -1880,6 +2110,39 @@
"dev": true,
"license": "MIT"
},
"node_modules/@types/jsdom": {
"version": "28.0.3",
"resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-28.0.3.tgz",
"integrity": "sha512-/HQ2uFoetFTXuye8vzIcHw2z6Fwi7Hi/qcgC+RoS9NCyewiqxhVGqlG+ViGB6lkax481R6dmhf1I7lIGlzJStQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/node": "*",
"@types/tough-cookie": "*",
"parse5": "^8.0.0",
"undici-types": "^7.21.0"
}
},
"node_modules/@types/jsdom/node_modules/parse5": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz",
"integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==",
"dev": true,
"license": "MIT",
"dependencies": {
"entities": "^8.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/@types/jsdom/node_modules/undici-types": {
"version": "7.25.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.25.0.tgz",
"integrity": "sha512-AXNgS1Byr27fTI+2bsPEkV9CxkT8H6xNyRI68b3TatlZo3RkzlqQBLL+w7SmGPVpokjHbcuNVQUWE7FRTg+LRA==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/mime-types": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/@types/mime-types/-/mime-types-2.1.4.tgz",
@@ -1915,6 +2178,20 @@
"@types/ws": "*"
}
},
"node_modules/@types/tough-cookie": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
"integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/turndown": {
"version": "5.0.6",
"resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.6.tgz",
"integrity": "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==",
"dev": true,
"license": "MIT"
},
"node_modules/@types/ws": {
"version": "8.18.1",
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz",
@@ -2036,6 +2313,15 @@
"node": ">=10.0.0"
}
},
"node_modules/bidi-js": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz",
"integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==",
"license": "MIT",
"dependencies": {
"require-from-string": "^2.0.2"
}
},
"node_modules/bignumber.js": {
"version": "9.3.1",
"resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz",
@@ -2196,6 +2482,19 @@
"integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
"license": "MIT"
},
"node_modules/css-tree": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.2.1.tgz",
"integrity": "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA==",
"license": "MIT",
"dependencies": {
"mdn-data": "2.27.1",
"source-map-js": "^1.2.1"
},
"engines": {
"node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
}
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
@@ -2206,6 +2505,19 @@
"node": ">= 12"
}
},
"node_modules/data-urls": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz",
"integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==",
"license": "MIT",
"dependencies": {
"whatwg-mimetype": "^5.0.0",
"whatwg-url": "^16.0.0"
},
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
}
},
"node_modules/debug": {
"version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -2224,6 +2536,12 @@
}
}
},
"node_modules/decimal.js": {
"version": "10.6.0",
"resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz",
"integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==",
"license": "MIT"
},
"node_modules/degenerator": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
@@ -2276,6 +2594,18 @@
"once": "^1.4.0"
}
},
"node_modules/entities": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz",
"integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=20.19.0"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/esbuild": {
"version": "0.28.0",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.28.0.tgz",
@@ -2712,6 +3042,18 @@
"node": "^20.17.0 || >=22.9.0"
}
},
"node_modules/html-encoding-sniffer": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
"integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==",
"license": "MIT",
"dependencies": {
"@exodus/bytes": "^1.6.0"
},
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
}
},
"node_modules/http-proxy-agent": {
"version": "7.0.2",
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
@@ -2803,12 +3145,70 @@
"node": ">=8"
}
},
"node_modules/is-potential-custom-element-name": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
"integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==",
"license": "MIT"
},
"node_modules/isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
"license": "MIT"
},
"node_modules/jsdom": {
"version": "29.1.1",
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-29.1.1.tgz",
"integrity": "sha512-ECi4Fi2f7BdJtUKTflYRTiaMxIB0O6zfR1fX0GXpUrf6flp8QIYn1UT20YQqdSOfk2dfkCwS8LAFoJDEppNK5Q==",
"license": "MIT",
"dependencies": {
"@asamuzakjp/css-color": "^5.1.11",
"@asamuzakjp/dom-selector": "^7.1.1",
"@bramus/specificity": "^2.4.2",
"@csstools/css-syntax-patches-for-csstree": "^1.1.3",
"@exodus/bytes": "^1.15.0",
"css-tree": "^3.2.1",
"data-urls": "^7.0.0",
"decimal.js": "^10.6.0",
"html-encoding-sniffer": "^6.0.0",
"is-potential-custom-element-name": "^1.0.1",
"lru-cache": "^11.3.5",
"parse5": "^8.0.1",
"saxes": "^6.0.0",
"symbol-tree": "^3.2.4",
"tough-cookie": "^6.0.1",
"undici": "^7.25.0",
"w3c-xmlserializer": "^5.0.0",
"webidl-conversions": "^8.0.1",
"whatwg-mimetype": "^5.0.0",
"whatwg-url": "^16.0.1",
"xml-name-validator": "^5.0.0"
},
"engines": {
"node": "^20.19.0 || ^22.13.0 || >=24.0.0"
},
"peerDependencies": {
"canvas": "^3.0.0"
},
"peerDependenciesMeta": {
"canvas": {
"optional": true
}
}
},
"node_modules/jsdom/node_modules/parse5": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz",
"integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==",
"license": "MIT",
"dependencies": {
"entities": "^8.0.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/json-bigint": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz",
@@ -2900,7 +3300,6 @@
"version": "11.5.0",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.0.tgz",
"integrity": "sha512-5YgH9UJd7wVb9hIouI2adWpgqrrICkt070Dnj8EUY1+B4B2P9eRLPAkAAo6NICA7CEhOIeBHl46u9zSNpNu7zA==",
"dev": true,
"license": "BlueOak-1.0.0",
"engines": {
"node": "20 || >=22"
@@ -2919,6 +3318,12 @@
"node": ">= 18"
}
},
"node_modules/mdn-data": {
"version": "2.27.1",
"resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.27.1.tgz",
"integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==",
"license": "CC0-1.0"
},
"node_modules/mime-db": {
"version": "1.54.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
@@ -3354,6 +3759,15 @@
"once": "^1.3.1"
}
},
"node_modules/punycode": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
"integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
"license": "MIT",
"engines": {
"node": ">=6"
}
},
"node_modules/readable-stream": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
@@ -3385,6 +3799,15 @@
"node": ">=0.10.0"
}
},
"node_modules/require-from-string": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
"integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/retry": {
"version": "0.13.1",
"resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
@@ -3416,6 +3839,18 @@
],
"license": "MIT"
},
"node_modules/saxes": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
"integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
"license": "ISC",
"dependencies": {
"xmlchars": "^2.2.0"
},
"engines": {
"node": ">=v12.22.7"
}
},
"node_modules/selenium-webdriver": {
"version": "4.44.0",
"resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.44.0.tgz",
@@ -3506,6 +3941,15 @@
"node": ">=0.10.0"
}
},
"node_modules/source-map-js": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
"integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
"license": "BSD-3-Clause",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/std-env": {
"version": "3.10.0",
"resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz",
@@ -3625,6 +4069,12 @@
"node": ">=8"
}
},
"node_modules/symbol-tree": {
"version": "3.2.4",
"resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
"integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==",
"license": "MIT"
},
"node_modules/thenify": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
@@ -3648,6 +4098,24 @@
"node": ">=0.8"
}
},
"node_modules/tldts": {
"version": "7.4.0",
"resolved": "https://registry.npmjs.org/tldts/-/tldts-7.4.0.tgz",
"integrity": "sha512-yHBe+zVfzNZ3QfTPW/Z6KK1G2t340gFjMHqI/4KKSt/abzYydzuCnpqdaF5gCCABby+9Yfbj59oR5F2Fd5CBzg==",
"license": "MIT",
"dependencies": {
"tldts-core": "^7.4.0"
},
"bin": {
"tldts": "bin/cli.js"
}
},
"node_modules/tldts-core": {
"version": "7.4.0",
"resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.4.0.tgz",
"integrity": "sha512-/mb9kRld+x1sIMXxWNOAp5m6C+D4GrAORWlJkOJ5dElvxdN1eutz/o7qHLp9gFvDF4Y3/L2xeScoxz6AbEo8rQ==",
"license": "MIT"
},
"node_modules/tmp": {
"version": "0.2.5",
"resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz",
@@ -3676,6 +4144,30 @@
"url": "https://github.com/sponsors/Borewit"
}
},
"node_modules/tough-cookie": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz",
"integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==",
"license": "BSD-3-Clause",
"dependencies": {
"tldts": "^7.0.5"
},
"engines": {
"node": ">=16"
}
},
"node_modules/tr46": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz",
"integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==",
"license": "MIT",
"dependencies": {
"punycode": "^2.3.1"
},
"engines": {
"node": ">=20"
}
},
"node_modules/ts-algebra": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
@@ -3709,6 +4201,19 @@
"fsevents": "~2.3.3"
}
},
"node_modules/turndown": {
"version": "7.2.4",
"resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.4.tgz",
"integrity": "sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==",
"license": "MIT",
"dependencies": {
"@mixmark-io/domino": "^2.2.0"
},
"engines": {
"node": ">=18",
"npm": ">=9"
}
},
"node_modules/typebox": {
"version": "1.1.38",
"resolved": "https://registry.npmjs.org/typebox/-/typebox-1.1.38.tgz",
@@ -3747,7 +4252,6 @@
"version": "7.25.0",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.25.0.tgz",
"integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=20.18.1"
@@ -3780,6 +4284,18 @@
"uuid": "dist-node/bin/uuid"
}
},
"node_modules/w3c-xmlserializer": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
"integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==",
"license": "MIT",
"dependencies": {
"xml-name-validator": "^5.0.0"
},
"engines": {
"node": ">=18"
}
},
"node_modules/web-streams-polyfill": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
@@ -3790,6 +4306,38 @@
"node": ">= 8"
}
},
"node_modules/webidl-conversions": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz",
"integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=20"
}
},
"node_modules/whatwg-mimetype": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz",
"integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==",
"license": "MIT",
"engines": {
"node": ">=20"
}
},
"node_modules/whatwg-url": {
"version": "16.0.1",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz",
"integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==",
"license": "MIT",
"dependencies": {
"@exodus/bytes": "^1.11.0",
"tr46": "^6.0.0",
"webidl-conversions": "^8.0.1"
},
"engines": {
"node": "^20.19.0 || ^22.12.0 || >=24.0.0"
}
},
"node_modules/wrap-ansi": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
@@ -3859,6 +4407,15 @@
}
}
},
"node_modules/xml-name-validator": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz",
"integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==",
"license": "Apache-2.0",
"engines": {
"node": ">=18"
}
},
"node_modules/xml-naming": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/xml-naming/-/xml-naming-0.1.0.tgz",
@@ -3875,6 +4432,12 @@
"node": ">=16.0.0"
}
},
"node_modules/xmlchars": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
"integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==",
"license": "MIT"
},
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",

View File

@@ -1,8 +1,8 @@
{
"name": "@evan/pi-search",
"name": "@evan/pi-web",
"version": "0.1.0",
"private": true,
"description": "Web search tool for pi: Kagi (session token via headless Firefox) or SearXNG (JSON API).",
"description": "Web tools for pi: web_search (Kagi session token / SearXNG) and web_fetch (Readability + Turndown over headless Firefox).",
"pi": {
"extensions": [
"./index.ts"
@@ -14,12 +14,17 @@
"lint": "oxlint . --ignore-pattern=.direnv/** --ignore-pattern=node_modules/**"
},
"dependencies": {
"selenium-webdriver": "^4.43.0"
"@mozilla/readability": "^0.6.0",
"jsdom": "^29.1.1",
"selenium-webdriver": "^4.43.0",
"turndown": "^7.2.4"
},
"devDependencies": {
"@mariozechner/pi-coding-agent": "^0.72.0",
"@types/jsdom": "^28.0.3",
"@types/node": "^22.10.0",
"@types/selenium-webdriver": "^4.35.5",
"@types/turndown": "^5.0.6",
"oxlint": "^1.62.0",
"tsx": "^4.19.2",
"typebox": "^1.1.37",

93
src/fetch.ts Normal file
View File

@@ -0,0 +1,93 @@
import { Readability } from "@mozilla/readability";
import { JSDOM } from "jsdom";
import TurndownService from "turndown";
import { getSharedDriver, resetSharedDriver } from "./driver.ts";
import { SearchError } from "./types.ts";
export interface FetchOptions {
url: string;
navTimeoutMs?: number;
maxBytes?: number;
}
export interface FetchResult {
url: string;
finalUrl: string;
title?: string;
markdown: string;
truncated: boolean;
}
// Convert HTML To Markdown - Readability first for article-shaped pages,
// then turndown on whatever survives (article subtree or full body). We
// don't fall back to plain text: turndown is robust enough that "noisy
// markdown" beats "lossy text" for the LLM, and the description tells the
// LLM to reach for curl when it needs the raw response.
function htmlToMarkdown(
html: string,
url: string,
): { title?: string; markdown: string } {
const dom = new JSDOM(html, { url });
const doc = dom.window.document;
let title: string | undefined;
let contentHtml: string;
try {
const article = new Readability(doc).parse();
if (article?.content) {
title = article.title ?? undefined;
contentHtml = article.content;
} else {
contentHtml = doc.body?.innerHTML ?? html;
}
} catch {
contentHtml = doc.body?.innerHTML ?? html;
}
const td = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
return { title, markdown: td.turndown(contentHtml).trim() };
}
function truncate(s: string, maxBytes: number): { text: string; truncated: boolean } {
const buf = Buffer.from(s, "utf-8");
if (buf.byteLength <= maxBytes) return { text: s, truncated: false };
const sliced = buf.subarray(0, maxBytes).toString("utf-8");
return {
text: `${sliced}\n\n... [truncated, original was ${buf.byteLength} bytes]`,
truncated: true,
};
}
export async function fetchPage({
url,
navTimeoutMs = 15000,
maxBytes = 50 * 1024,
}: FetchOptions): Promise<FetchResult> {
// Retry Once On Session Failure - Same rationale as searchKagi: cached
// selenium sessions can die between calls.
for (let attempt = 0; attempt < 2; attempt++) {
try {
const driver = await getSharedDriver();
await driver.manage().setTimeouts({ pageLoad: navTimeoutMs });
await driver.get(url);
const [html, finalUrl] = await Promise.all([
driver.getPageSource(),
driver.getCurrentUrl(),
]);
const { title, markdown } = htmlToMarkdown(html, finalUrl);
const { text, truncated } = truncate(markdown, maxBytes);
return { url, finalUrl, title, markdown: text, truncated };
} catch (err) {
if (attempt === 0) {
await resetSharedDriver();
continue;
}
const msg = err instanceof Error ? err.message : String(err);
throw new SearchError("FETCH_FAILED", `Fetch failed for ${url}: ${msg}`);
}
}
throw new SearchError("FETCH_FAILED", `Fetch failed for ${url}`);
}