refactor!: replace snapshot with reader fallback, collapse commands
Remove the snapshot command and enhance reader to try Firefox Reader View first, falling back to raw Turndown conversion of document.body when Reader View fails or is skipped via --no-reader. - reader always returns markdown by default (--format=json for structured) - JSON output includes method: 'reader' | 'raw' to signal extraction path - --no-reader skips Reader View (stays on loaded page, preserving JS mutations) - Add @ts-nocheck to test/smoke.js and exclude test/ from tsconfig - Update all tests from snapshot to reader with --no-reader for data URIs - Update AGENTS.md and help text BREAKING CHANGE: snapshot subcommand removed; use reader instead.
This commit is contained in:
199
src/index.ts
199
src/index.ts
@@ -21,6 +21,7 @@ interface ReaderArticle {
|
||||
sourceUrl?: string;
|
||||
finalUrl?: string;
|
||||
markdown?: string;
|
||||
method?: "reader" | "raw";
|
||||
}
|
||||
|
||||
// Parse CLI Args
|
||||
@@ -87,10 +88,9 @@ function helpText() {
|
||||
return `Usage: glimpse <command> <url> [options]
|
||||
|
||||
Commands:
|
||||
snapshot <url> [options] Return an agent-friendly page snapshot as JSON
|
||||
reader <url> [options] Extract page content as Markdown (Reader View with raw fallback)
|
||||
exec <url> [options] Execute JavaScript on a page and return the result
|
||||
screenshot <url> [options] Save a PNG screenshot of a page
|
||||
reader <url> [options] Extract Firefox Reader View content as Markdown
|
||||
search <query> [options] Search using a supported provider and return JSON results
|
||||
|
||||
Common Options:
|
||||
@@ -114,6 +114,7 @@ Screenshot Options:
|
||||
Reader Options:
|
||||
--format=<format> Output format: markdown, html, text, json (default: markdown)
|
||||
--output=<file> Write output to a file
|
||||
--no-reader Skip Reader View and use raw page extraction
|
||||
|
||||
Search Options:
|
||||
--provider=<provider> Search provider: kagi (default: config or kagi)
|
||||
@@ -121,11 +122,12 @@ Search Options:
|
||||
--format=<format> Output format: markdown, json (default: markdown)
|
||||
|
||||
Examples:
|
||||
glimpse snapshot https://example.com
|
||||
glimpse reader https://example.com
|
||||
glimpse reader https://example.com --no-reader
|
||||
glimpse reader https://example.com/article --output=article.md
|
||||
glimpse exec https://example.com --js="return document.title"
|
||||
glimpse exec https://example.com --script=extract.js
|
||||
glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png
|
||||
glimpse reader https://example.com/article --script=prepare.js --output=article.md
|
||||
KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`;
|
||||
}
|
||||
|
||||
@@ -284,95 +286,6 @@ async function withPage(
|
||||
});
|
||||
}
|
||||
|
||||
const snapshotScript = `
|
||||
const normalize = (value) => String(value || "").replace(/\\s+/g, " ").trim();
|
||||
const visibleText = (element) => normalize(element?.innerText || element?.textContent || "");
|
||||
const safeValue = (input) => ["password", "hidden"].includes(input.type) ? "" : input.value || "";
|
||||
const labelText = (input) => {
|
||||
const labels = Array.from(input.labels || []).map((label) => visibleText(label)).filter(Boolean);
|
||||
if (labels.length > 0) return labels.join(" ");
|
||||
|
||||
if (input.id) {
|
||||
const label = Array.from(document.querySelectorAll("label[for]"))
|
||||
.find((candidate) => candidate.getAttribute("for") === input.id);
|
||||
if (label) return visibleText(label);
|
||||
}
|
||||
|
||||
return "";
|
||||
};
|
||||
const inputSummary = (input) => ({
|
||||
type: input.type || input.tagName.toLowerCase(),
|
||||
name: input.name || "",
|
||||
id: input.id || "",
|
||||
placeholder: input.placeholder || "",
|
||||
value: safeValue(input),
|
||||
label: labelText(input),
|
||||
});
|
||||
const collectHeadings = () => {
|
||||
try {
|
||||
return Array.from(document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']"))
|
||||
.map((heading) => {
|
||||
const tagLevel = heading.tagName.match(/^H([1-6])$/i)?.[1];
|
||||
const ariaLevel = heading.getAttribute("aria-level");
|
||||
const level = Number.parseInt(tagLevel || ariaLevel || "0", 10);
|
||||
const text = visibleText(heading);
|
||||
|
||||
return text ? { level: level || null, text } : null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
text: normalize(document.body?.innerText || ""),
|
||||
headings: collectHeadings(),
|
||||
links: Array.from(document.querySelectorAll("a[href]"))
|
||||
.map((link) => ({ text: visibleText(link), href: link.href }))
|
||||
.filter((link) => link.text || link.href),
|
||||
buttons: Array.from(document.querySelectorAll("button,input[type='button'],input[type='submit'],input[type='reset'],[role='button']"))
|
||||
.map((button) => ({
|
||||
text: visibleText(button) || button.value || button.getAttribute("aria-label") || "",
|
||||
type: button.type || button.getAttribute("role") || "button",
|
||||
name: button.name || "",
|
||||
id: button.id || "",
|
||||
}))
|
||||
.filter((button) => button.text || button.name || button.id),
|
||||
inputs: Array.from(document.querySelectorAll("input,textarea,select"))
|
||||
.map(inputSummary),
|
||||
forms: Array.from(document.querySelectorAll("form"))
|
||||
.map((form) => ({
|
||||
action: form.action || "",
|
||||
method: (form.method || "get").toLowerCase(),
|
||||
text: visibleText(form),
|
||||
inputs: Array.from(form.querySelectorAll("input,textarea,select")).map(inputSummary),
|
||||
})),
|
||||
};
|
||||
`;
|
||||
|
||||
async function snapshotCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
|
||||
if (!targetUrl) usage();
|
||||
|
||||
return withPage(targetUrl, async (driver: WebDriver) => {
|
||||
// Capture Page Metadata
|
||||
const [url, title, result] = await Promise.all([
|
||||
driver.getCurrentUrl(),
|
||||
driver.getTitle(),
|
||||
driver.executeScript(snapshotScript),
|
||||
]);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
url,
|
||||
title,
|
||||
result,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function execCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
|
||||
@@ -498,27 +411,19 @@ async function searchCommand() {
|
||||
}
|
||||
}
|
||||
|
||||
async function readerCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
const outputPath = getOption("--output");
|
||||
const format = getOption("--format") ?? "markdown";
|
||||
// Try Reader View Extraction
|
||||
async function tryReaderView(
|
||||
driver: WebDriver,
|
||||
finalUrl: string,
|
||||
targetUrl: string,
|
||||
): Promise<ReaderArticle | null> {
|
||||
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
|
||||
await driver.get(readerUrl);
|
||||
|
||||
if (!targetUrl) usage();
|
||||
|
||||
return withPage(targetUrl, async (driver: WebDriver) => {
|
||||
// Capture Final Url
|
||||
const finalUrl = await driver.getCurrentUrl();
|
||||
|
||||
// Open Firefox Reader View
|
||||
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
|
||||
await driver.get(readerUrl);
|
||||
|
||||
// Wait For Reader Content
|
||||
let article: ReaderArticle;
|
||||
try {
|
||||
article = await driver.wait(
|
||||
async () => {
|
||||
return driver.executeScript(`
|
||||
try {
|
||||
return await driver.wait(
|
||||
async () => {
|
||||
return driver.executeScript(`
|
||||
const content = document.querySelector("#moz-reader-content, .moz-reader-content");
|
||||
const error = document.querySelector(".reader-error");
|
||||
const text = content?.innerText?.trim() || "";
|
||||
@@ -540,15 +445,67 @@ async function readerCommand() {
|
||||
|
||||
return null;
|
||||
`);
|
||||
},
|
||||
timeoutMs,
|
||||
`No readable article content found for URL: ${targetUrl}`,
|
||||
);
|
||||
} catch (err) {
|
||||
cliError("TIMEOUT", err.message);
|
||||
},
|
||||
timeoutMs,
|
||||
`No readable article content found for URL: ${targetUrl}`,
|
||||
);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Raw Page Extraction Fallback
|
||||
async function extractRawPage(
|
||||
driver: WebDriver,
|
||||
originalUrl?: string,
|
||||
): Promise<ReaderArticle> {
|
||||
// Navigate Back If Needed (e.g. after failed Reader View)
|
||||
if (originalUrl) {
|
||||
await driver.get(originalUrl);
|
||||
}
|
||||
|
||||
const result = (await driver.executeScript(`
|
||||
return {
|
||||
title: document.title || "",
|
||||
html: document.body?.innerHTML || "",
|
||||
text: document.body?.innerText?.trim() || "",
|
||||
};
|
||||
`)) as { title: string; html: string; text: string };
|
||||
|
||||
return {
|
||||
title: result.title,
|
||||
html: result.html,
|
||||
text: result.text,
|
||||
};
|
||||
}
|
||||
|
||||
async function readerCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
const outputPath = getOption("--output");
|
||||
const format = getOption("--format") ?? "markdown";
|
||||
const skipReader = args.includes("--no-reader");
|
||||
|
||||
if (!targetUrl) usage();
|
||||
|
||||
return withPage(targetUrl, async (driver: WebDriver) => {
|
||||
const finalUrl = await driver.getCurrentUrl();
|
||||
|
||||
// Extract Page Content
|
||||
let article: ReaderArticle;
|
||||
if (!skipReader) {
|
||||
article = await tryReaderView(driver, finalUrl, targetUrl);
|
||||
}
|
||||
|
||||
// Render Output
|
||||
// Fallback To Raw Extraction
|
||||
if (!article) {
|
||||
// Navigate back only if Reader View was attempted
|
||||
article = await extractRawPage(driver, skipReader ? undefined : finalUrl);
|
||||
article.method = "raw";
|
||||
} else {
|
||||
article.method = "reader";
|
||||
}
|
||||
|
||||
// Build Output
|
||||
article.sourceUrl = targetUrl;
|
||||
article.finalUrl = finalUrl;
|
||||
article.markdown = articleToMarkdown(article);
|
||||
@@ -583,8 +540,6 @@ async function main() {
|
||||
appConfig = loadConfig({ path: configPath });
|
||||
|
||||
switch (command) {
|
||||
case "snapshot":
|
||||
return snapshotCommand();
|
||||
case "exec":
|
||||
return execCommand();
|
||||
case "screenshot":
|
||||
|
||||
Reference in New Issue
Block a user