diff --git a/README.md b/README.md
index 14e18e6..c728afb 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,21 @@
-# evan/pi-search
+# evan/pi-web
-Web search extension for [pi coding agent](https://github.com/mariozechner/pi-coding-agent). Registers a single `search` tool. Choose your provider via config.
+Web tools for [pi coding agent](https://github.com/mariozechner/pi-coding-agent). Registers two tools backed by a shared headless Firefox session that's kept warm for the lifetime of the pi process.
-## Providers
+| Tool | Purpose |
+| ------------ | ----------------------------------------------------------------------- |
+| `web_search` | Search the web via Kagi (session token) or SearXNG (JSON API). |
+| `web_fetch` | Fetch a URL and return readable markdown (Readability + Turndown). |
+
+For raw HTTP responses, non-text content, or simple API calls, the LLM is steered toward `bash` + `curl` rather than `web_fetch`.
+
+## Search Providers
| Provider | How it works | Requires |
| --------- | --------------------------------------------------------------------------------------------- | -------------------------------------------- |
| `kagi` | Drives a headless Firefox session against `kagi.com/search?token=…&q=…` and scrapes results. | Kagi session token, `firefox`, `geckodriver` |
| `searxng` | Calls a SearXNG instance's `/search?format=json` endpoint. | A SearXNG base URL with JSON format enabled |
-The Kagi driver is shared across calls for the lifetime of the pi process, so you only pay browser startup once per session.
-
## Config
Drop a JSON file at `~/.pi/pi-search/config.json`:
@@ -29,26 +34,44 @@ Drop a JSON file at `~/.pi/pi-search/config.json`:
### Env Var Overrides
-| Variable | Overrides |
-| ------------------------- | -------------------- |
-| `PI_SEARCH_PROVIDER` | `provider` |
-| `KAGI_TOKEN` | `kagi.token` |
-| `PI_SEARCH_SEARXNG_URL` | `searxng.baseUrl` |
+| Variable | Overrides |
+| ----------------------- | ----------------- |
+| `PI_SEARCH_PROVIDER` | `provider` |
+| `KAGI_TOKEN` | `kagi.token` |
+| `PI_SEARCH_SEARXNG_URL` | `searxng.baseUrl` |
### Getting A Kagi Session Token
Open `kagi.com`, sign in, then go to **Settings → Session Link**. Copy the `token=` value from the link. Treat it like a password — it grants full account access.
-## Tool
+## Tools
-| Name | Args | Returns |
-| -------- | ------------------- | ------------------------------------------------------ |
-| `search` | `query: string` | Markdown list of `## [title](url)\n> description` items |
+### `web_search`
+
+| Arg | Type | Description |
+| ------- | ------ | ----------------- |
+| `query` | string | Search query text |
+
+Returns a markdown list of `## [title](url)\n> description` items.
+
+### `web_fetch`
+
+| Arg | Type | Description |
+| ----- | ------ | --------------------- |
+| `url` | string | Absolute URL to fetch |
+
+Returns markdown of the page. Pipeline:
+
+1. Navigate the shared Firefox session to the URL (15s timeout).
+2. Run [Readability](https://github.com/mozilla/readability) to extract the article subtree.
+3. If Readability finds nothing, fall back to the full `
`.
+4. Convert with [Turndown](https://github.com/mixmark-io/turndown).
+5. Truncate at 50KB with a clear marker.
## Install
```bash
-cd ~/.pi/agent/extensions/pi-search
+cd ~/.pi/agent/extensions/pi-web
npm install
```
diff --git a/index.ts b/index.ts
index cd21614..a71f8b0 100644
--- a/index.ts
+++ b/index.ts
@@ -1,8 +1,10 @@
-// Pi-Search Extension - Registers a single `search` tool. Provider (kagi or
-// searxng) is chosen via ~/.pi/pi-search/config.json or PI_SEARCH_PROVIDER.
+// Pi-Web Extension - Registers `web_search` and `web_fetch` tools backed by
+// a shared headless Firefox session. Provider config lives in
+// ~/.pi/pi-search/config.json (env overrides supported).
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "typebox";
import { ConfigError, loadConfig, resolveSettings } from "./src/config.ts";
+import { fetchPage, type FetchResult } from "./src/fetch.ts";
import { searchKagi } from "./src/providers/kagi.ts";
import { searchSearxng } from "./src/providers/searxng.ts";
import { SearchError, type SearchResult } from "./src/types.ts";
@@ -14,6 +16,14 @@ function formatResults(results: SearchResult[]): string {
.join("\n\n");
}
+function formatFetch(r: FetchResult): string {
+ const header =
+ r.title && r.title.trim()
+ ? `# ${r.title}\n\n<${r.finalUrl}>\n\n`
+ : `<${r.finalUrl}>\n\n`;
+ return `${header}${r.markdown}`;
+}
+
async function runSearch(query: string): Promise {
const config = loadConfig();
const settings = resolveSettings(config);
@@ -27,7 +37,7 @@ async function runSearch(query: string): Promise {
export default function (pi: ExtensionAPI) {
pi.registerTool({
- name: "search",
+ name: "web_search",
label: "Web Search",
description:
"Search the web. Returns a markdown list of titles, URLs, and snippets.",
@@ -54,4 +64,40 @@ export default function (pi: ExtensionAPI) {
}
},
});
+
+ pi.registerTool({
+ name: "web_fetch",
+ label: "Web Fetch",
+ description:
+ "Fetch a URL and return the page content as readable markdown (Readability + Turndown over a headless Firefox session, so JS-rendered pages work). For raw HTML, non-text content (PDFs, images), or simple HTTP requests, use bash with curl instead.",
+ promptSnippet: "Fetch a URL and convert the page to readable markdown",
+ parameters: Type.Object({
+ url: Type.String({ description: "Absolute URL to fetch" }),
+ }),
+ async execute(_toolCallId, params) {
+ try {
+ const result = await fetchPage({ url: params.url });
+ return {
+ content: [{ type: "text", text: formatFetch(result) }],
+ details: { raw: result },
+ };
+ } catch (err) {
+ if (err instanceof SearchError) {
+ return {
+ content: [{ type: "text", text: `Fetch error: ${err.message}` }],
+ isError: true,
+ details: {
+ raw: {
+ url: params.url,
+ finalUrl: params.url,
+ markdown: "",
+ truncated: false,
+ } satisfies FetchResult,
+ },
+ };
+ }
+ throw err;
+ }
+ },
+ });
}
diff --git a/package-lock.json b/package-lock.json
index 5e0a5c9..fca98b7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,12 +8,17 @@
"name": "@evan/pi-search",
"version": "0.1.0",
"dependencies": {
- "selenium-webdriver": "^4.43.0"
+ "@mozilla/readability": "^0.6.0",
+ "jsdom": "^29.1.1",
+ "selenium-webdriver": "^4.43.0",
+ "turndown": "^7.2.4"
},
"devDependencies": {
"@mariozechner/pi-coding-agent": "^0.72.0",
+ "@types/jsdom": "^28.0.3",
"@types/node": "^22.10.0",
"@types/selenium-webdriver": "^4.35.5",
+ "@types/turndown": "^5.0.6",
"oxlint": "^1.62.0",
"tsx": "^4.19.2",
"typebox": "^1.1.37",
@@ -41,6 +46,53 @@
}
}
},
+ "node_modules/@asamuzakjp/css-color": {
+ "version": "5.1.11",
+ "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-5.1.11.tgz",
+ "integrity": "sha512-KVw6qIiCTUQhByfTd78h2yD1/00waTmm9uy/R7Ck/ctUyAPj+AEDLkQIdJW0T8+qGgj3j5bpNKK7Q3G+LedJWg==",
+ "license": "MIT",
+ "dependencies": {
+ "@asamuzakjp/generational-cache": "^1.0.1",
+ "@csstools/css-calc": "^3.2.0",
+ "@csstools/css-color-parser": "^4.1.0",
+ "@csstools/css-parser-algorithms": "^4.0.0",
+ "@csstools/css-tokenizer": "^4.0.0"
+ },
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ }
+ },
+ "node_modules/@asamuzakjp/dom-selector": {
+ "version": "7.1.1",
+ "resolved": "https://registry.npmjs.org/@asamuzakjp/dom-selector/-/dom-selector-7.1.1.tgz",
+ "integrity": "sha512-67RZDnYRc8H/8MLDgQCDE//zoqVFwajkepHZgmXrbwybzXOEwOWGPYGmALYl9J2DOLfFPPs6kKCqmbzV895hTQ==",
+ "license": "MIT",
+ "dependencies": {
+ "@asamuzakjp/generational-cache": "^1.0.1",
+ "@asamuzakjp/nwsapi": "^2.3.9",
+ "bidi-js": "^1.0.3",
+ "css-tree": "^3.2.1",
+ "is-potential-custom-element-name": "^1.0.1"
+ },
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ }
+ },
+ "node_modules/@asamuzakjp/generational-cache": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/@asamuzakjp/generational-cache/-/generational-cache-1.0.1.tgz",
+ "integrity": "sha512-wajfB8KqzMCN2KGNFdLkReeHncd0AslUSrvHVvvYWuU8ghncRJoA50kT3zP9MVL0+9g4/67H+cdvBskj9THPzg==",
+ "license": "MIT",
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ }
+ },
+ "node_modules/@asamuzakjp/nwsapi": {
+ "version": "2.3.9",
+ "resolved": "https://registry.npmjs.org/@asamuzakjp/nwsapi/-/nwsapi-2.3.9.tgz",
+ "integrity": "sha512-n8GuYSrI9bF7FFZ/SjhwevlHc8xaVlb/7HmHelnc/PZXBD2ZR49NnN9sMMuDdEGPeeRQ5d0hqlSlEpgCX3Wl0Q==",
+ "license": "MIT"
+ },
"node_modules/@aws-crypto/crc32": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-5.2.0.tgz",
@@ -517,6 +569,152 @@
"url": "https://github.com/sponsors/Borewit"
}
},
+ "node_modules/@bramus/specificity": {
+ "version": "2.4.2",
+ "resolved": "https://registry.npmjs.org/@bramus/specificity/-/specificity-2.4.2.tgz",
+ "integrity": "sha512-ctxtJ/eA+t+6q2++vj5j7FYX3nRu311q1wfYH3xjlLOsczhlhxAg2FWNUXhpGvAw3BWo1xBcvOV6/YLc2r5FJw==",
+ "license": "MIT",
+ "dependencies": {
+ "css-tree": "^3.0.0"
+ },
+ "bin": {
+ "specificity": "bin/cli.js"
+ }
+ },
+ "node_modules/@csstools/color-helpers": {
+ "version": "6.0.2",
+ "resolved": "https://registry.npmjs.org/@csstools/color-helpers/-/color-helpers-6.0.2.tgz",
+ "integrity": "sha512-LMGQLS9EuADloEFkcTBR3BwV/CGHV7zyDxVRtVDTwdI2Ca4it0CCVTT9wCkxSgokjE5Ho41hEPgb8OEUwoXr6Q==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/csstools"
+ },
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/csstools"
+ }
+ ],
+ "license": "MIT-0",
+ "engines": {
+ "node": ">=20.19.0"
+ }
+ },
+ "node_modules/@csstools/css-calc": {
+ "version": "3.2.1",
+ "resolved": "https://registry.npmjs.org/@csstools/css-calc/-/css-calc-3.2.1.tgz",
+ "integrity": "sha512-DtdHlgXh5ZkA43cwBcAm+huzgJiwx3ZTWVjBs94kwz2xKqSimDA3lBgCjphYgwgVUMWatSM0pDd8TILB1yrVVg==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/csstools"
+ },
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/csstools"
+ }
+ ],
+ "license": "MIT",
+ "engines": {
+ "node": ">=20.19.0"
+ },
+ "peerDependencies": {
+ "@csstools/css-parser-algorithms": "^4.0.0",
+ "@csstools/css-tokenizer": "^4.0.0"
+ }
+ },
+ "node_modules/@csstools/css-color-parser": {
+ "version": "4.1.1",
+ "resolved": "https://registry.npmjs.org/@csstools/css-color-parser/-/css-color-parser-4.1.1.tgz",
+ "integrity": "sha512-eZ5XOtyhK+mggRafYUWzA0tvaYOFgdY8AkgQiCJF9qNAePnUo/zmsqqYubBBb3sQ8uNUaSKTY9s9klfRaAXL0g==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/csstools"
+ },
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/csstools"
+ }
+ ],
+ "license": "MIT",
+ "dependencies": {
+ "@csstools/color-helpers": "^6.0.2",
+ "@csstools/css-calc": "^3.2.1"
+ },
+ "engines": {
+ "node": ">=20.19.0"
+ },
+ "peerDependencies": {
+ "@csstools/css-parser-algorithms": "^4.0.0",
+ "@csstools/css-tokenizer": "^4.0.0"
+ }
+ },
+ "node_modules/@csstools/css-parser-algorithms": {
+ "version": "4.0.0",
+ "resolved": "https://registry.npmjs.org/@csstools/css-parser-algorithms/-/css-parser-algorithms-4.0.0.tgz",
+ "integrity": "sha512-+B87qS7fIG3L5h3qwJ/IFbjoVoOe/bpOdh9hAjXbvx0o8ImEmUsGXN0inFOnk2ChCFgqkkGFQ+TpM5rbhkKe4w==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/csstools"
+ },
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/csstools"
+ }
+ ],
+ "license": "MIT",
+ "engines": {
+ "node": ">=20.19.0"
+ },
+ "peerDependencies": {
+ "@csstools/css-tokenizer": "^4.0.0"
+ }
+ },
+ "node_modules/@csstools/css-syntax-patches-for-csstree": {
+ "version": "1.1.4",
+ "resolved": "https://registry.npmjs.org/@csstools/css-syntax-patches-for-csstree/-/css-syntax-patches-for-csstree-1.1.4.tgz",
+ "integrity": "sha512-wgsqt92b7C7tQhIdPNxj0n9zuUbQlvAuI1exyzeNrOKOi62SD7ren8zqszmpVREjAOqg8cD2FqYhQfAuKjk4sw==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/csstools"
+ },
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/csstools"
+ }
+ ],
+ "license": "MIT-0",
+ "peerDependencies": {
+ "css-tree": "^3.2.1"
+ },
+ "peerDependenciesMeta": {
+ "css-tree": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/@csstools/css-tokenizer": {
+ "version": "4.0.0",
+ "resolved": "https://registry.npmjs.org/@csstools/css-tokenizer/-/css-tokenizer-4.0.0.tgz",
+ "integrity": "sha512-QxULHAm7cNu72w97JUNCBFODFaXpbDg+dP8b/oWFAZ2MTRppA3U00Y2L1HqaS4J6yBqxwa/Y3nMBaxVKbB/NsA==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/csstools"
+ },
+ {
+ "type": "opencollective",
+ "url": "https://opencollective.com/csstools"
+ }
+ ],
+ "license": "MIT",
+ "engines": {
+ "node": ">=20.19.0"
+ }
+ },
"node_modules/@esbuild/aix-ppc64": {
"version": "0.28.0",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.28.0.tgz",
@@ -959,6 +1157,23 @@
"node": ">=18"
}
},
+ "node_modules/@exodus/bytes": {
+ "version": "1.15.1",
+ "resolved": "https://registry.npmjs.org/@exodus/bytes/-/bytes-1.15.1.tgz",
+ "integrity": "sha512-S6mL0yNB/Abt9Ei4tq8gDhcczc4S3+vQ4ra7vxnAf+YHC02srtqxKKZghx2Dq6p0e66THKwR6r8N6P95wEty7Q==",
+ "license": "MIT",
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ },
+ "peerDependencies": {
+ "@noble/hashes": "^1.8.0 || ^2.0.0"
+ },
+ "peerDependenciesMeta": {
+ "@noble/hashes": {
+ "optional": true
+ }
+ }
+ },
"node_modules/@google/genai": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.52.0.tgz",
@@ -1303,6 +1518,21 @@
"zod-to-json-schema": "^3.25.0"
}
},
+ "node_modules/@mixmark-io/domino": {
+ "version": "2.2.0",
+ "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz",
+ "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==",
+ "license": "BSD-2-Clause"
+ },
+ "node_modules/@mozilla/readability": {
+ "version": "0.6.0",
+ "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.6.0.tgz",
+ "integrity": "sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==",
+ "license": "Apache-2.0",
+ "engines": {
+ "node": ">=14.0.0"
+ }
+ },
"node_modules/@nodable/entities": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/@nodable/entities/-/entities-2.1.0.tgz",
@@ -1880,6 +2110,39 @@
"dev": true,
"license": "MIT"
},
+ "node_modules/@types/jsdom": {
+ "version": "28.0.3",
+ "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-28.0.3.tgz",
+ "integrity": "sha512-/HQ2uFoetFTXuye8vzIcHw2z6Fwi7Hi/qcgC+RoS9NCyewiqxhVGqlG+ViGB6lkax481R6dmhf1I7lIGlzJStQ==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/node": "*",
+ "@types/tough-cookie": "*",
+ "parse5": "^8.0.0",
+ "undici-types": "^7.21.0"
+ }
+ },
+ "node_modules/@types/jsdom/node_modules/parse5": {
+ "version": "8.0.1",
+ "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz",
+ "integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "entities": "^8.0.0"
+ },
+ "funding": {
+ "url": "https://github.com/inikulin/parse5?sponsor=1"
+ }
+ },
+ "node_modules/@types/jsdom/node_modules/undici-types": {
+ "version": "7.25.0",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.25.0.tgz",
+ "integrity": "sha512-AXNgS1Byr27fTI+2bsPEkV9CxkT8H6xNyRI68b3TatlZo3RkzlqQBLL+w7SmGPVpokjHbcuNVQUWE7FRTg+LRA==",
+ "dev": true,
+ "license": "MIT"
+ },
"node_modules/@types/mime-types": {
"version": "2.1.4",
"resolved": "https://registry.npmjs.org/@types/mime-types/-/mime-types-2.1.4.tgz",
@@ -1915,6 +2178,20 @@
"@types/ws": "*"
}
},
+ "node_modules/@types/tough-cookie": {
+ "version": "4.0.5",
+ "resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
+ "integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/turndown": {
+ "version": "5.0.6",
+ "resolved": "https://registry.npmjs.org/@types/turndown/-/turndown-5.0.6.tgz",
+ "integrity": "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==",
+ "dev": true,
+ "license": "MIT"
+ },
"node_modules/@types/ws": {
"version": "8.18.1",
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz",
@@ -2036,6 +2313,15 @@
"node": ">=10.0.0"
}
},
+ "node_modules/bidi-js": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/bidi-js/-/bidi-js-1.0.3.tgz",
+ "integrity": "sha512-RKshQI1R3YQ+n9YJz2QQ147P66ELpa1FQEg20Dk8oW9t2KgLbpDLLp9aGZ7y8WHSshDknG0bknqGw5/tyCs5tw==",
+ "license": "MIT",
+ "dependencies": {
+ "require-from-string": "^2.0.2"
+ }
+ },
"node_modules/bignumber.js": {
"version": "9.3.1",
"resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz",
@@ -2196,6 +2482,19 @@
"integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
"license": "MIT"
},
+ "node_modules/css-tree": {
+ "version": "3.2.1",
+ "resolved": "https://registry.npmjs.org/css-tree/-/css-tree-3.2.1.tgz",
+ "integrity": "sha512-X7sjQzceUhu1u7Y/ylrRZFU2FS6LRiFVp6rKLPg23y3x3c3DOKAwuXGDp+PAGjh6CSnCjYeAul8pcT8bAl+lSA==",
+ "license": "MIT",
+ "dependencies": {
+ "mdn-data": "2.27.1",
+ "source-map-js": "^1.2.1"
+ },
+ "engines": {
+ "node": "^10 || ^12.20.0 || ^14.13.0 || >=15.0.0"
+ }
+ },
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
@@ -2206,6 +2505,19 @@
"node": ">= 12"
}
},
+ "node_modules/data-urls": {
+ "version": "7.0.0",
+ "resolved": "https://registry.npmjs.org/data-urls/-/data-urls-7.0.0.tgz",
+ "integrity": "sha512-23XHcCF+coGYevirZceTVD7NdJOqVn+49IHyxgszm+JIiHLoB2TkmPtsYkNWT1pvRSGkc35L6NHs0yHkN2SumA==",
+ "license": "MIT",
+ "dependencies": {
+ "whatwg-mimetype": "^5.0.0",
+ "whatwg-url": "^16.0.0"
+ },
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ }
+ },
"node_modules/debug": {
"version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -2224,6 +2536,12 @@
}
}
},
+ "node_modules/decimal.js": {
+ "version": "10.6.0",
+ "resolved": "https://registry.npmjs.org/decimal.js/-/decimal.js-10.6.0.tgz",
+ "integrity": "sha512-YpgQiITW3JXGntzdUmyUR1V812Hn8T1YVXhCu+wO3OpS4eU9l4YdD3qjyiKdV6mvV29zapkMeD390UVEf2lkUg==",
+ "license": "MIT"
+ },
"node_modules/degenerator": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz",
@@ -2276,6 +2594,18 @@
"once": "^1.4.0"
}
},
+ "node_modules/entities": {
+ "version": "8.0.0",
+ "resolved": "https://registry.npmjs.org/entities/-/entities-8.0.0.tgz",
+ "integrity": "sha512-zwfzJecQ/Uej6tusMqwAqU/6KL2XaB2VZ2Jg54Je6ahNBGNH6Ek6g3jjNCF0fG9EWQKGZNddNjU5F1ZQn/sBnA==",
+ "license": "BSD-2-Clause",
+ "engines": {
+ "node": ">=20.19.0"
+ },
+ "funding": {
+ "url": "https://github.com/fb55/entities?sponsor=1"
+ }
+ },
"node_modules/esbuild": {
"version": "0.28.0",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.28.0.tgz",
@@ -2712,6 +3042,18 @@
"node": "^20.17.0 || >=22.9.0"
}
},
+ "node_modules/html-encoding-sniffer": {
+ "version": "6.0.0",
+ "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-6.0.0.tgz",
+ "integrity": "sha512-CV9TW3Y3f8/wT0BRFc1/KAVQ3TUHiXmaAb6VW9vtiMFf7SLoMd1PdAc4W3KFOFETBJUb90KatHqlsZMWV+R9Gg==",
+ "license": "MIT",
+ "dependencies": {
+ "@exodus/bytes": "^1.6.0"
+ },
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ }
+ },
"node_modules/http-proxy-agent": {
"version": "7.0.2",
"resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
@@ -2803,12 +3145,70 @@
"node": ">=8"
}
},
+ "node_modules/is-potential-custom-element-name": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
+ "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==",
+ "license": "MIT"
+ },
"node_modules/isarray": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
"license": "MIT"
},
+ "node_modules/jsdom": {
+ "version": "29.1.1",
+ "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-29.1.1.tgz",
+ "integrity": "sha512-ECi4Fi2f7BdJtUKTflYRTiaMxIB0O6zfR1fX0GXpUrf6flp8QIYn1UT20YQqdSOfk2dfkCwS8LAFoJDEppNK5Q==",
+ "license": "MIT",
+ "dependencies": {
+ "@asamuzakjp/css-color": "^5.1.11",
+ "@asamuzakjp/dom-selector": "^7.1.1",
+ "@bramus/specificity": "^2.4.2",
+ "@csstools/css-syntax-patches-for-csstree": "^1.1.3",
+ "@exodus/bytes": "^1.15.0",
+ "css-tree": "^3.2.1",
+ "data-urls": "^7.0.0",
+ "decimal.js": "^10.6.0",
+ "html-encoding-sniffer": "^6.0.0",
+ "is-potential-custom-element-name": "^1.0.1",
+ "lru-cache": "^11.3.5",
+ "parse5": "^8.0.1",
+ "saxes": "^6.0.0",
+ "symbol-tree": "^3.2.4",
+ "tough-cookie": "^6.0.1",
+ "undici": "^7.25.0",
+ "w3c-xmlserializer": "^5.0.0",
+ "webidl-conversions": "^8.0.1",
+ "whatwg-mimetype": "^5.0.0",
+ "whatwg-url": "^16.0.1",
+ "xml-name-validator": "^5.0.0"
+ },
+ "engines": {
+ "node": "^20.19.0 || ^22.13.0 || >=24.0.0"
+ },
+ "peerDependencies": {
+ "canvas": "^3.0.0"
+ },
+ "peerDependenciesMeta": {
+ "canvas": {
+ "optional": true
+ }
+ }
+ },
+ "node_modules/jsdom/node_modules/parse5": {
+ "version": "8.0.1",
+ "resolved": "https://registry.npmjs.org/parse5/-/parse5-8.0.1.tgz",
+ "integrity": "sha512-z1e/HMG90obSGeidlli3hj7cbocou0/wa5HacvI3ASx34PecNjNQeaHNo5WIZpWofN9kgkqV1q5YvXe3F0FoPw==",
+ "license": "MIT",
+ "dependencies": {
+ "entities": "^8.0.0"
+ },
+ "funding": {
+ "url": "https://github.com/inikulin/parse5?sponsor=1"
+ }
+ },
"node_modules/json-bigint": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz",
@@ -2900,7 +3300,6 @@
"version": "11.5.0",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.5.0.tgz",
"integrity": "sha512-5YgH9UJd7wVb9hIouI2adWpgqrrICkt070Dnj8EUY1+B4B2P9eRLPAkAAo6NICA7CEhOIeBHl46u9zSNpNu7zA==",
- "dev": true,
"license": "BlueOak-1.0.0",
"engines": {
"node": "20 || >=22"
@@ -2919,6 +3318,12 @@
"node": ">= 18"
}
},
+ "node_modules/mdn-data": {
+ "version": "2.27.1",
+ "resolved": "https://registry.npmjs.org/mdn-data/-/mdn-data-2.27.1.tgz",
+ "integrity": "sha512-9Yubnt3e8A0OKwxYSXyhLymGW4sCufcLG6VdiDdUGVkPhpqLxlvP5vl1983gQjJl3tqbrM731mjaZaP68AgosQ==",
+ "license": "CC0-1.0"
+ },
"node_modules/mime-db": {
"version": "1.54.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
@@ -3354,6 +3759,15 @@
"once": "^1.3.1"
}
},
+ "node_modules/punycode": {
+ "version": "2.3.1",
+ "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
+ "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=6"
+ }
+ },
"node_modules/readable-stream": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
@@ -3385,6 +3799,15 @@
"node": ">=0.10.0"
}
},
+ "node_modules/require-from-string": {
+ "version": "2.0.2",
+ "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+ "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=0.10.0"
+ }
+ },
"node_modules/retry": {
"version": "0.13.1",
"resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
@@ -3416,6 +3839,18 @@
],
"license": "MIT"
},
+ "node_modules/saxes": {
+ "version": "6.0.0",
+ "resolved": "https://registry.npmjs.org/saxes/-/saxes-6.0.0.tgz",
+ "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
+ "license": "ISC",
+ "dependencies": {
+ "xmlchars": "^2.2.0"
+ },
+ "engines": {
+ "node": ">=v12.22.7"
+ }
+ },
"node_modules/selenium-webdriver": {
"version": "4.44.0",
"resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.44.0.tgz",
@@ -3506,6 +3941,15 @@
"node": ">=0.10.0"
}
},
+ "node_modules/source-map-js": {
+ "version": "1.2.1",
+ "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+ "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
+ "license": "BSD-3-Clause",
+ "engines": {
+ "node": ">=0.10.0"
+ }
+ },
"node_modules/std-env": {
"version": "3.10.0",
"resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz",
@@ -3625,6 +4069,12 @@
"node": ">=8"
}
},
+ "node_modules/symbol-tree": {
+ "version": "3.2.4",
+ "resolved": "https://registry.npmjs.org/symbol-tree/-/symbol-tree-3.2.4.tgz",
+ "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==",
+ "license": "MIT"
+ },
"node_modules/thenify": {
"version": "3.3.1",
"resolved": "https://registry.npmjs.org/thenify/-/thenify-3.3.1.tgz",
@@ -3648,6 +4098,24 @@
"node": ">=0.8"
}
},
+ "node_modules/tldts": {
+ "version": "7.4.0",
+ "resolved": "https://registry.npmjs.org/tldts/-/tldts-7.4.0.tgz",
+ "integrity": "sha512-yHBe+zVfzNZ3QfTPW/Z6KK1G2t340gFjMHqI/4KKSt/abzYydzuCnpqdaF5gCCABby+9Yfbj59oR5F2Fd5CBzg==",
+ "license": "MIT",
+ "dependencies": {
+ "tldts-core": "^7.4.0"
+ },
+ "bin": {
+ "tldts": "bin/cli.js"
+ }
+ },
+ "node_modules/tldts-core": {
+ "version": "7.4.0",
+ "resolved": "https://registry.npmjs.org/tldts-core/-/tldts-core-7.4.0.tgz",
+ "integrity": "sha512-/mb9kRld+x1sIMXxWNOAp5m6C+D4GrAORWlJkOJ5dElvxdN1eutz/o7qHLp9gFvDF4Y3/L2xeScoxz6AbEo8rQ==",
+ "license": "MIT"
+ },
"node_modules/tmp": {
"version": "0.2.5",
"resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz",
@@ -3676,6 +4144,30 @@
"url": "https://github.com/sponsors/Borewit"
}
},
+ "node_modules/tough-cookie": {
+ "version": "6.0.1",
+ "resolved": "https://registry.npmjs.org/tough-cookie/-/tough-cookie-6.0.1.tgz",
+ "integrity": "sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==",
+ "license": "BSD-3-Clause",
+ "dependencies": {
+ "tldts": "^7.0.5"
+ },
+ "engines": {
+ "node": ">=16"
+ }
+ },
+ "node_modules/tr46": {
+ "version": "6.0.0",
+ "resolved": "https://registry.npmjs.org/tr46/-/tr46-6.0.0.tgz",
+ "integrity": "sha512-bLVMLPtstlZ4iMQHpFHTR7GAGj2jxi8Dg0s2h2MafAE4uSWF98FC/3MomU51iQAMf8/qDUbKWf5GxuvvVcXEhw==",
+ "license": "MIT",
+ "dependencies": {
+ "punycode": "^2.3.1"
+ },
+ "engines": {
+ "node": ">=20"
+ }
+ },
"node_modules/ts-algebra": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
@@ -3709,6 +4201,19 @@
"fsevents": "~2.3.3"
}
},
+ "node_modules/turndown": {
+ "version": "7.2.4",
+ "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.4.tgz",
+ "integrity": "sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==",
+ "license": "MIT",
+ "dependencies": {
+ "@mixmark-io/domino": "^2.2.0"
+ },
+ "engines": {
+ "node": ">=18",
+ "npm": ">=9"
+ }
+ },
"node_modules/typebox": {
"version": "1.1.38",
"resolved": "https://registry.npmjs.org/typebox/-/typebox-1.1.38.tgz",
@@ -3747,7 +4252,6 @@
"version": "7.25.0",
"resolved": "https://registry.npmjs.org/undici/-/undici-7.25.0.tgz",
"integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==",
- "dev": true,
"license": "MIT",
"engines": {
"node": ">=20.18.1"
@@ -3780,6 +4284,18 @@
"uuid": "dist-node/bin/uuid"
}
},
+ "node_modules/w3c-xmlserializer": {
+ "version": "5.0.0",
+ "resolved": "https://registry.npmjs.org/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
+ "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==",
+ "license": "MIT",
+ "dependencies": {
+ "xml-name-validator": "^5.0.0"
+ },
+ "engines": {
+ "node": ">=18"
+ }
+ },
"node_modules/web-streams-polyfill": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
@@ -3790,6 +4306,38 @@
"node": ">= 8"
}
},
+ "node_modules/webidl-conversions": {
+ "version": "8.0.1",
+ "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-8.0.1.tgz",
+ "integrity": "sha512-BMhLD/Sw+GbJC21C/UgyaZX41nPt8bUTg+jWyDeg7e7YN4xOM05YPSIXceACnXVtqyEw/LMClUQMtMZ+PGGpqQ==",
+ "license": "BSD-2-Clause",
+ "engines": {
+ "node": ">=20"
+ }
+ },
+ "node_modules/whatwg-mimetype": {
+ "version": "5.0.0",
+ "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-5.0.0.tgz",
+ "integrity": "sha512-sXcNcHOC51uPGF0P/D4NVtrkjSU2fNsm9iog4ZvZJsL3rjoDAzXZhkm2MWt1y+PUdggKAYVoMAIYcs78wJ51Cw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=20"
+ }
+ },
+ "node_modules/whatwg-url": {
+ "version": "16.0.1",
+ "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-16.0.1.tgz",
+ "integrity": "sha512-1to4zXBxmXHV3IiSSEInrreIlu02vUOvrhxJJH5vcxYTBDAx51cqZiKdyTxlecdKNSjj8EcxGBxNf6Vg+945gw==",
+ "license": "MIT",
+ "dependencies": {
+ "@exodus/bytes": "^1.11.0",
+ "tr46": "^6.0.0",
+ "webidl-conversions": "^8.0.1"
+ },
+ "engines": {
+ "node": "^20.19.0 || ^22.12.0 || >=24.0.0"
+ }
+ },
"node_modules/wrap-ansi": {
"version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
@@ -3859,6 +4407,15 @@
}
}
},
+ "node_modules/xml-name-validator": {
+ "version": "5.0.0",
+ "resolved": "https://registry.npmjs.org/xml-name-validator/-/xml-name-validator-5.0.0.tgz",
+ "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==",
+ "license": "Apache-2.0",
+ "engines": {
+ "node": ">=18"
+ }
+ },
"node_modules/xml-naming": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/xml-naming/-/xml-naming-0.1.0.tgz",
@@ -3875,6 +4432,12 @@
"node": ">=16.0.0"
}
},
+ "node_modules/xmlchars": {
+ "version": "2.2.0",
+ "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz",
+ "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==",
+ "license": "MIT"
+ },
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
diff --git a/package.json b/package.json
index 837cf27..a9b5e6e 100644
--- a/package.json
+++ b/package.json
@@ -1,8 +1,8 @@
{
- "name": "@evan/pi-search",
+ "name": "@evan/pi-web",
"version": "0.1.0",
"private": true,
- "description": "Web search tool for pi: Kagi (session token via headless Firefox) or SearXNG (JSON API).",
+ "description": "Web tools for pi: web_search (Kagi session token / SearXNG) and web_fetch (Readability + Turndown over headless Firefox).",
"pi": {
"extensions": [
"./index.ts"
@@ -14,12 +14,17 @@
"lint": "oxlint . --ignore-pattern=.direnv/** --ignore-pattern=node_modules/**"
},
"dependencies": {
- "selenium-webdriver": "^4.43.0"
+ "@mozilla/readability": "^0.6.0",
+ "jsdom": "^29.1.1",
+ "selenium-webdriver": "^4.43.0",
+ "turndown": "^7.2.4"
},
"devDependencies": {
"@mariozechner/pi-coding-agent": "^0.72.0",
+ "@types/jsdom": "^28.0.3",
"@types/node": "^22.10.0",
"@types/selenium-webdriver": "^4.35.5",
+ "@types/turndown": "^5.0.6",
"oxlint": "^1.62.0",
"tsx": "^4.19.2",
"typebox": "^1.1.37",
diff --git a/src/fetch.ts b/src/fetch.ts
new file mode 100644
index 0000000..004e56f
--- /dev/null
+++ b/src/fetch.ts
@@ -0,0 +1,93 @@
+import { Readability } from "@mozilla/readability";
+import { JSDOM } from "jsdom";
+import TurndownService from "turndown";
+import { getSharedDriver, resetSharedDriver } from "./driver.ts";
+import { SearchError } from "./types.ts";
+
+export interface FetchOptions {
+ url: string;
+ navTimeoutMs?: number;
+ maxBytes?: number;
+}
+
+export interface FetchResult {
+ url: string;
+ finalUrl: string;
+ title?: string;
+ markdown: string;
+ truncated: boolean;
+}
+
+// Convert HTML To Markdown - Readability first for article-shaped pages,
+// then turndown on whatever survives (article subtree or full body). We
+// don't fall back to plain text: turndown is robust enough that "noisy
+// markdown" beats "lossy text" for the LLM, and the description tells the
+// LLM to reach for curl when it needs the raw response.
+function htmlToMarkdown(
+ html: string,
+ url: string,
+): { title?: string; markdown: string } {
+ const dom = new JSDOM(html, { url });
+ const doc = dom.window.document;
+
+ let title: string | undefined;
+ let contentHtml: string;
+
+ try {
+ const article = new Readability(doc).parse();
+ if (article?.content) {
+ title = article.title ?? undefined;
+ contentHtml = article.content;
+ } else {
+ contentHtml = doc.body?.innerHTML ?? html;
+ }
+ } catch {
+ contentHtml = doc.body?.innerHTML ?? html;
+ }
+
+ const td = new TurndownService({ headingStyle: "atx", codeBlockStyle: "fenced" });
+ return { title, markdown: td.turndown(contentHtml).trim() };
+}
+
+function truncate(s: string, maxBytes: number): { text: string; truncated: boolean } {
+ const buf = Buffer.from(s, "utf-8");
+ if (buf.byteLength <= maxBytes) return { text: s, truncated: false };
+ const sliced = buf.subarray(0, maxBytes).toString("utf-8");
+ return {
+ text: `${sliced}\n\n... [truncated, original was ${buf.byteLength} bytes]`,
+ truncated: true,
+ };
+}
+
+export async function fetchPage({
+ url,
+ navTimeoutMs = 15000,
+ maxBytes = 50 * 1024,
+}: FetchOptions): Promise {
+ // Retry Once On Session Failure - Same rationale as searchKagi: cached
+ // selenium sessions can die between calls.
+ for (let attempt = 0; attempt < 2; attempt++) {
+ try {
+ const driver = await getSharedDriver();
+ await driver.manage().setTimeouts({ pageLoad: navTimeoutMs });
+ await driver.get(url);
+
+ const [html, finalUrl] = await Promise.all([
+ driver.getPageSource(),
+ driver.getCurrentUrl(),
+ ]);
+
+ const { title, markdown } = htmlToMarkdown(html, finalUrl);
+ const { text, truncated } = truncate(markdown, maxBytes);
+ return { url, finalUrl, title, markdown: text, truncated };
+ } catch (err) {
+ if (attempt === 0) {
+ await resetSharedDriver();
+ continue;
+ }
+ const msg = err instanceof Error ? err.message : String(err);
+ throw new SearchError("FETCH_FAILED", `Fetch failed for ${url}: ${msg}`);
+ }
+ }
+ throw new SearchError("FETCH_FAILED", `Fetch failed for ${url}`);
+}