refactor!: replace snapshot with reader fallback, collapse commands

Remove the snapshot command and enhance reader to try Firefox Reader
View first, falling back to raw Turndown conversion of document.body
when Reader View fails or is skipped via --no-reader.

- reader always returns markdown by default (--format=json for structured)
- JSON output includes method: 'reader' | 'raw' to signal extraction path
- --no-reader skips Reader View (stays on loaded page, preserving JS mutations)
- Add @ts-nocheck to test/smoke.js and exclude test/ from tsconfig
- Update all tests from snapshot to reader with --no-reader for data URIs
- Update AGENTS.md and help text

BREAKING CHANGE: snapshot subcommand removed; use reader instead.
This commit is contained in:
2026-05-02 20:05:27 -04:00
parent eb1de23f4e
commit 6adb5111de
4 changed files with 136 additions and 170 deletions

View File

@@ -4,7 +4,7 @@
This project provides small Firefox/Selenium browser utilities packaged by Nix:
- `glimpse` - generic page utilities with subcommands, including provider-backed search
- `glimpse` - headless browser CLI with subcommands for page extraction, JS execution, screenshots, and search
Keep the tools simple, scriptable, and JSON-friendly.
@@ -18,7 +18,7 @@ npm run test:list
node test/smoke.js <tag-or-name>
```
For smoke testing without external network dependencies, use focused tags or scripts such as `npm run test:snapshot`, `npm run test:wait`, `npm run test:errors`, or `node test/smoke.js snapshot js`. Run `npm test` and `nix build .#default --no-link` when the change is broad, touches packaging, or needs full validation. Smoke tests require Firefox and geckodriver on `PATH` and use local `data:` HTML pages.
For smoke testing without external network dependencies, use focused tags or scripts such as `npm run test:wait`, `npm run test:errors`, or `node test/smoke.js reader js`. Run `npm test` and `nix build .#default --no-link` when the change is broad, touches packaging, or needs full validation. Smoke tests require Firefox and geckodriver on `PATH` and use local `data:` HTML pages.
Do not attempt a live Kagi test unless `KAGI_TOKEN` is available.
@@ -38,10 +38,9 @@ Do not attempt a live Kagi test unless `KAGI_TOKEN` is available.
Current `glimpse` subcommands:
- `snapshot <url>` - return an agent-friendly page snapshot as JSON
- `reader <url>` - extract page content as Markdown (tries Firefox Reader View, falls back to raw Turndown conversion); supports `--no-reader` to skip Reader View, `--format=json` for structured output
- `exec <url> --js=<code>` or `--script=<file>` - execute JavaScript and return the result
- `screenshot <url> --output=<file>` - save a PNG screenshot
- `reader <url>` - open Firefox Reader View and output readable content as Markdown
- `search <query>` - search with a supported provider and output JSON results
## Runtime Requirements

View File

@@ -21,6 +21,7 @@ interface ReaderArticle {
sourceUrl?: string;
finalUrl?: string;
markdown?: string;
method?: "reader" | "raw";
}
// Parse CLI Args
@@ -87,10 +88,9 @@ function helpText() {
return `Usage: glimpse <command> <url> [options]
Commands:
snapshot <url> [options] Return an agent-friendly page snapshot as JSON
reader <url> [options] Extract page content as Markdown (Reader View with raw fallback)
exec <url> [options] Execute JavaScript on a page and return the result
screenshot <url> [options] Save a PNG screenshot of a page
reader <url> [options] Extract Firefox Reader View content as Markdown
search <query> [options] Search using a supported provider and return JSON results
Common Options:
@@ -114,6 +114,7 @@ Screenshot Options:
Reader Options:
--format=<format> Output format: markdown, html, text, json (default: markdown)
--output=<file> Write output to a file
--no-reader Skip Reader View and use raw page extraction
Search Options:
--provider=<provider> Search provider: kagi (default: config or kagi)
@@ -121,11 +122,12 @@ Search Options:
--format=<format> Output format: markdown, json (default: markdown)
Examples:
glimpse snapshot https://example.com
glimpse reader https://example.com
glimpse reader https://example.com --no-reader
glimpse reader https://example.com/article --output=article.md
glimpse exec https://example.com --js="return document.title"
glimpse exec https://example.com --script=extract.js
glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png
glimpse reader https://example.com/article --script=prepare.js --output=article.md
KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`;
}
@@ -284,95 +286,6 @@ async function withPage(
});
}
const snapshotScript = `
const normalize = (value) => String(value || "").replace(/\\s+/g, " ").trim();
const visibleText = (element) => normalize(element?.innerText || element?.textContent || "");
const safeValue = (input) => ["password", "hidden"].includes(input.type) ? "" : input.value || "";
const labelText = (input) => {
const labels = Array.from(input.labels || []).map((label) => visibleText(label)).filter(Boolean);
if (labels.length > 0) return labels.join(" ");
if (input.id) {
const label = Array.from(document.querySelectorAll("label[for]"))
.find((candidate) => candidate.getAttribute("for") === input.id);
if (label) return visibleText(label);
}
return "";
};
const inputSummary = (input) => ({
type: input.type || input.tagName.toLowerCase(),
name: input.name || "",
id: input.id || "",
placeholder: input.placeholder || "",
value: safeValue(input),
label: labelText(input),
});
const collectHeadings = () => {
try {
return Array.from(document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']"))
.map((heading) => {
const tagLevel = heading.tagName.match(/^H([1-6])$/i)?.[1];
const ariaLevel = heading.getAttribute("aria-level");
const level = Number.parseInt(tagLevel || ariaLevel || "0", 10);
const text = visibleText(heading);
return text ? { level: level || null, text } : null;
})
.filter(Boolean);
} catch {
return [];
}
};
return {
text: normalize(document.body?.innerText || ""),
headings: collectHeadings(),
links: Array.from(document.querySelectorAll("a[href]"))
.map((link) => ({ text: visibleText(link), href: link.href }))
.filter((link) => link.text || link.href),
buttons: Array.from(document.querySelectorAll("button,input[type='button'],input[type='submit'],input[type='reset'],[role='button']"))
.map((button) => ({
text: visibleText(button) || button.value || button.getAttribute("aria-label") || "",
type: button.type || button.getAttribute("role") || "button",
name: button.name || "",
id: button.id || "",
}))
.filter((button) => button.text || button.name || button.id),
inputs: Array.from(document.querySelectorAll("input,textarea,select"))
.map(inputSummary),
forms: Array.from(document.querySelectorAll("form"))
.map((form) => ({
action: form.action || "",
method: (form.method || "get").toLowerCase(),
text: visibleText(form),
inputs: Array.from(form.querySelectorAll("input,textarea,select")).map(inputSummary),
})),
};
`;
async function snapshotCommand() {
const [targetUrl] = getPositionalArgs();
if (!targetUrl) usage();
return withPage(targetUrl, async (driver: WebDriver) => {
// Capture Page Metadata
const [url, title, result] = await Promise.all([
driver.getCurrentUrl(),
driver.getTitle(),
driver.executeScript(snapshotScript),
]);
return {
ok: true,
url,
title,
result,
};
});
}
async function execCommand() {
const [targetUrl] = getPositionalArgs();
@@ -498,27 +411,19 @@ async function searchCommand() {
}
}
async function readerCommand() {
const [targetUrl] = getPositionalArgs();
const outputPath = getOption("--output");
const format = getOption("--format") ?? "markdown";
// Try Reader View Extraction
async function tryReaderView(
driver: WebDriver,
finalUrl: string,
targetUrl: string,
): Promise<ReaderArticle | null> {
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
await driver.get(readerUrl);
if (!targetUrl) usage();
return withPage(targetUrl, async (driver: WebDriver) => {
// Capture Final Url
const finalUrl = await driver.getCurrentUrl();
// Open Firefox Reader View
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
await driver.get(readerUrl);
// Wait For Reader Content
let article: ReaderArticle;
try {
article = await driver.wait(
async () => {
return driver.executeScript(`
try {
return await driver.wait(
async () => {
return driver.executeScript(`
const content = document.querySelector("#moz-reader-content, .moz-reader-content");
const error = document.querySelector(".reader-error");
const text = content?.innerText?.trim() || "";
@@ -540,15 +445,67 @@ async function readerCommand() {
return null;
`);
},
timeoutMs,
`No readable article content found for URL: ${targetUrl}`,
);
} catch (err) {
cliError("TIMEOUT", err.message);
},
timeoutMs,
`No readable article content found for URL: ${targetUrl}`,
);
} catch {
return null;
}
}
// Raw Page Extraction Fallback
async function extractRawPage(
driver: WebDriver,
originalUrl?: string,
): Promise<ReaderArticle> {
// Navigate Back If Needed (e.g. after failed Reader View)
if (originalUrl) {
await driver.get(originalUrl);
}
const result = (await driver.executeScript(`
return {
title: document.title || "",
html: document.body?.innerHTML || "",
text: document.body?.innerText?.trim() || "",
};
`)) as { title: string; html: string; text: string };
return {
title: result.title,
html: result.html,
text: result.text,
};
}
async function readerCommand() {
const [targetUrl] = getPositionalArgs();
const outputPath = getOption("--output");
const format = getOption("--format") ?? "markdown";
const skipReader = args.includes("--no-reader");
if (!targetUrl) usage();
return withPage(targetUrl, async (driver: WebDriver) => {
const finalUrl = await driver.getCurrentUrl();
// Extract Page Content
let article: ReaderArticle;
if (!skipReader) {
article = await tryReaderView(driver, finalUrl, targetUrl);
}
// Render Output
// Fallback To Raw Extraction
if (!article) {
// Navigate back only if Reader View was attempted
article = await extractRawPage(driver, skipReader ? undefined : finalUrl);
article.method = "raw";
} else {
article.method = "reader";
}
// Build Output
article.sourceUrl = targetUrl;
article.finalUrl = finalUrl;
article.markdown = articleToMarkdown(article);
@@ -583,8 +540,6 @@ async function main() {
appConfig = loadConfig({ path: configPath });
switch (command) {
case "snapshot":
return snapshotCommand();
case "exec":
return execCommand();
case "screenshot":

View File

@@ -1,4 +1,5 @@
#!/usr/bin/env node
// @ts-nocheck
import {
mkdtempSync,
@@ -73,7 +74,7 @@ test("no args prints help", ["help", "cli"], () => {
assert.equal(result.status, 0, result.stderr || result.stdout);
assert.match(result.stdout, /Usage: glimpse <command> <url> \[options\]/);
assert.match(result.stdout, /snapshot <url>/);
assert.match(result.stdout, /reader <url>/);
assert.equal(result.stderr, "");
});
@@ -86,48 +87,50 @@ test("help flag prints help", ["help", "cli"], () => {
assert.equal(result.stderr, "");
});
test("snapshot returns page metadata and content", ["snapshot"], () => {
const output = expectSuccess([
"snapshot",
test("reader extracts page content as markdown", ["reader"], () => {
const result = runCli([
"reader",
dataHtml(
'<title>Hello</title><h1>Main</h1><a href="/x">X</a><button>Go</button>',
'<title>Hello</title><h1>Main</h1><p>Some text</p><a href="https://example.com">Link</a>',
),
"--no-reader",
]);
assert.equal(output.ok, true);
assert.equal(output.title, "Hello");
assert.equal(typeof output.elapsedMs, "number");
assert.deepEqual(output.result.headings, [{ level: 1, text: "Main" }]);
assert.deepEqual(output.result.links, [{ href: "/x", text: "X" }]);
assert.equal(output.result.buttons[0].text, "Go");
assert.match(output.result.text, /Main/);
assert.equal(result.status, 0, result.stderr || result.stdout);
const output = result.stdout.trim();
assert.match(output, /# Main/);
assert.match(output, /Some text/);
assert.match(output, /\[Link\]\(https:\/\/example\.com\/??\)/);
});
test("snapshot extracts aria headings", ["snapshot"], () => {
test("reader returns json format with method field", ["reader"], () => {
const output = expectSuccess([
"snapshot",
dataHtml(
'<title>Hello</title><div role="heading" aria-level="2">ARIA</div>',
),
"reader",
dataHtml("<title>Hello</title><h1>Main</h1><p>World</p>"),
"--no-reader",
"--format=json",
]);
assert.equal(output.ok, true);
assert.deepEqual(output.result.headings, [{ level: 2, text: "ARIA" }]);
assert.equal(output.title, "Hello");
assert.equal(output.method, "raw");
assert.equal(typeof output.markdown, "string");
assert.match(output.markdown, /# Main/);
assert.match(output.text, /Main/);
});
test(
"snapshot runs top-level javascript before extraction",
["snapshot", "js"],
"reader runs top-level javascript before extraction",
["reader", "js"],
() => {
const output = expectSuccess([
"snapshot",
const result = runCli([
"reader",
dataHtml("<title>Hello</title><h1>Old</h1>"),
"--no-reader",
"--js=document.querySelector('h1').textContent = 'New'",
]);
assert.equal(output.ok, true);
assert.deepEqual(output.result.headings, [{ level: 1, text: "New" }]);
assert.equal(output.result.text, "New");
assert.equal(result.status, 0, result.stderr || result.stdout);
assert.match(result.stdout, /# New/);
},
);
@@ -182,8 +185,9 @@ test(
writeFileSync(configPath, "not json");
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
`--config=${configPath}`,
]);
@@ -201,8 +205,9 @@ test(
writeFileSync(configPath, JSON.stringify({ search: { provider: 42 } }));
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
`--config=${configPath}`,
]);
@@ -218,17 +223,17 @@ test("empty home config is accepted", ["config"], () => {
mkdirSync(configDir, { recursive: true });
writeFileSync(join(configDir, "config.json"), "{}");
const output = expectSuccess(
["snapshot", dataHtml("<title>Hello</title><h1>Main</h1>")],
const result = runCli(
["reader", dataHtml("<title>Hello</title><h1>Main</h1>"), "--no-reader"],
{ env: { ...process.env, XDG_CONFIG_HOME: configHome } },
);
assert.equal(output.ok, true);
assert.equal(output.title, "Hello");
assert.equal(result.status, 0, result.stderr || result.stdout);
assert.match(result.stdout, /# Main/);
});
test("unknown command returns structured error", ["errors", "cli"], () => {
const output = expectFailure(["nope", dataHtml("<title>Hello</title>")]);
const output = expectFailure(["nope", dataHtml("<title>Hello</title>"), "--no-reader"]);
assert.equal(output.ok, false);
assert.equal(output.error.code, "UNKNOWN_COMMAND");
@@ -241,8 +246,9 @@ test(
["errors", "timeout"],
() => {
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
"--timeout=abc",
]);
@@ -255,8 +261,9 @@ test(
test("invalid wait-until returns invalid option", ["errors", "wait"], () => {
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
"--wait-until=loaded",
]);
@@ -266,20 +273,22 @@ test("invalid wait-until returns invalid option", ["errors", "wait"], () => {
});
test("wait-js succeeds when condition is true", ["wait"], () => {
const output = expectSuccess([
"snapshot",
dataHtml("<title>Hello</title>"),
const result = runCli([
"reader",
dataHtml("<title>Hello</title><h1>Main</h1>"),
"--no-reader",
'--wait-js=return document.title === "Hello"',
]);
assert.equal(output.ok, true);
assert.equal(output.title, "Hello");
assert.equal(result.status, 0, result.stderr || result.stdout);
assert.match(result.stdout, /# Main/);
});
test("wait-js timeout returns wait timeout", ["wait", "errors"], () => {
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
"--wait-js=return false",
"--timeout=1",
]);
@@ -296,8 +305,9 @@ test(
["wait", "errors", "js"],
() => {
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
'--wait-js=throw new Error("boom")',
]);
@@ -313,8 +323,9 @@ test(
["errors", "js"],
() => {
const output = expectFailure([
"snapshot",
"reader",
dataHtml("<title>Hello</title>"),
"--no-reader",
'--js=throw new Error("boom")',
]);

View File

@@ -11,5 +11,6 @@
"forceConsistentCasingInFileNames": true,
"skipLibCheck": true
},
"include": ["src/**/*.ts"]
"include": ["src/**/*.ts"],
"exclude": ["test", "node_modules", "dist"]
}