556 lines
15 KiB
JavaScript
Executable File
556 lines
15 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
import { createDriver } from "./driver.js";
|
|
import { searchKagi } from "./kagi.js";
|
|
import { readFileSync, writeFileSync } from "node:fs";
|
|
import TurndownService from "turndown";
|
|
|
|
const DEFAULT_TIMEOUT_MS = 10000;
|
|
const POLL_INTERVAL_MS = 200;
|
|
const startTime = Date.now();
|
|
const runContext = {};
|
|
|
|
// Parse CLI Args
|
|
const [command, ...args] = process.argv.slice(2);
|
|
const headless = !args.includes("--no-headless");
|
|
const existingUrl = getOption("--url");
|
|
const inlineJs = getOption("--js");
|
|
const scriptPath = getOption("--script");
|
|
const waitJs = getOption("--wait-js");
|
|
const waitUntil = getOption("--wait-until") ?? "none";
|
|
let timeoutMs = DEFAULT_TIMEOUT_MS;
|
|
|
|
function getOption(name) {
|
|
const prefix = `${name}=`;
|
|
return args.find((arg) => arg.startsWith(prefix))?.slice(prefix.length);
|
|
}
|
|
|
|
function getPositionalArgs() {
|
|
return args.filter((arg) => !arg.startsWith("--"));
|
|
}
|
|
|
|
function elapsedMs() {
|
|
return Date.now() - startTime;
|
|
}
|
|
|
|
function printResult(result) {
|
|
if (result === undefined) {
|
|
return;
|
|
}
|
|
|
|
const outputValue =
|
|
result && typeof result === "object" && !Array.isArray(result)
|
|
? { ...result, elapsedMs: result.elapsedMs ?? elapsedMs() }
|
|
: result;
|
|
const output =
|
|
typeof outputValue === "object"
|
|
? JSON.stringify(outputValue, null, 2)
|
|
: String(outputValue);
|
|
process.stdout.write(output.endsWith("\n") ? output : `${output}\n`);
|
|
}
|
|
|
|
class CliError extends Error {
|
|
constructor(code, message, details = {}) {
|
|
super(message);
|
|
this.code = code;
|
|
this.details = details;
|
|
}
|
|
}
|
|
|
|
function cliError(code, message, details = {}) {
|
|
throw new CliError(code, message, details);
|
|
}
|
|
|
|
function unknownCommand(name) {
|
|
cliError("UNKNOWN_COMMAND", `Unknown command: ${name}`);
|
|
}
|
|
|
|
function helpText() {
|
|
return `Usage: glimpse <command> <url> [options]
|
|
|
|
Commands:
|
|
snapshot <url> [options] Return an agent-friendly page snapshot as JSON
|
|
exec <url> [options] Execute JavaScript on a page and return the result
|
|
screenshot <url> [options] Save a PNG screenshot of a page
|
|
reader <url> [options] Extract Firefox Reader View content as Markdown
|
|
search <query> [options] Search using a supported provider and return JSON results
|
|
|
|
Common Options:
|
|
--help Show this help
|
|
--no-headless Show Firefox instead of running headless
|
|
--url=<server> Connect to an existing WebDriver server
|
|
--timeout=<ms> Maximum wait time in milliseconds (default: 10000)
|
|
--wait-js=<code> Poll JS until it returns a truthy value
|
|
--wait-until=<state> Wait for readiness: none, interactive, complete (default: none)
|
|
--js=<code> Execute inline JS before command logic
|
|
--script=<file> Execute JS from a file before command logic
|
|
|
|
Exec Options:
|
|
--js=<code> Return the top-level JS result
|
|
--script=<file> Return the top-level script result
|
|
|
|
Screenshot Options:
|
|
--output=<file> Output PNG path (default: screenshot.png)
|
|
|
|
Reader Options:
|
|
--format=<format> Output format: markdown, html, text, json (default: markdown)
|
|
--output=<file> Write output to a file
|
|
|
|
Search Options:
|
|
--provider=<provider> Search provider: kagi (default: kagi)
|
|
--token=<token> Kagi token (default: KAGI_TOKEN)
|
|
|
|
Examples:
|
|
glimpse snapshot https://example.com
|
|
glimpse exec https://example.com --js="return document.title"
|
|
glimpse exec https://example.com --script=extract.js
|
|
glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png
|
|
glimpse reader https://example.com/article --script=prepare.js --output=article.md
|
|
KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`;
|
|
}
|
|
|
|
function printHelp() {
|
|
process.stdout.write(`${helpText()}\n`);
|
|
}
|
|
|
|
function usage() {
|
|
cliError("USAGE_ERROR", "Usage: glimpse <command> <url> [options]. Run glimpse --help for details.");
|
|
}
|
|
|
|
function parseTimeout() {
|
|
const value = getOption("--timeout");
|
|
if (value === undefined) {
|
|
return DEFAULT_TIMEOUT_MS;
|
|
}
|
|
|
|
const parsed = Number.parseInt(value, 10);
|
|
if (!Number.isInteger(parsed) || parsed <= 0 || String(parsed) !== value) {
|
|
cliError("INVALID_OPTION", "--timeout must be a positive integer.");
|
|
}
|
|
|
|
return parsed;
|
|
}
|
|
|
|
function validateCommonOptions() {
|
|
if (inlineJs && scriptPath) {
|
|
cliError("INVALID_OPTION", "Use either --js or --script, not both.");
|
|
}
|
|
|
|
// Validate Timeout
|
|
timeoutMs = parseTimeout();
|
|
|
|
// Validate Wait State
|
|
if (!["none", "interactive", "complete"].includes(waitUntil)) {
|
|
cliError(
|
|
"INVALID_OPTION",
|
|
`Unsupported --wait-until value: ${waitUntil}. Expected none, interactive, or complete.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
function getPreludeScriptSource() {
|
|
if (scriptPath) {
|
|
return readFileSync(scriptPath, "utf-8");
|
|
}
|
|
|
|
return inlineJs;
|
|
}
|
|
|
|
async function withDriver(action) {
|
|
let driver;
|
|
|
|
try {
|
|
driver = await createDriver({ headless, existingUrl });
|
|
} catch (err) {
|
|
cliError("BROWSER_START_FAILED", err.message);
|
|
}
|
|
|
|
try {
|
|
return await action(driver);
|
|
} finally {
|
|
await driver.quit();
|
|
}
|
|
}
|
|
|
|
async function waitForReadyState(driver) {
|
|
if (waitUntil === "none") {
|
|
return;
|
|
}
|
|
|
|
try {
|
|
await driver.wait(async () => {
|
|
const readyState = await driver.executeScript("return document.readyState");
|
|
return waitUntil === "interactive"
|
|
? ["interactive", "complete"].includes(readyState)
|
|
: readyState === "complete";
|
|
}, timeoutMs);
|
|
} catch {
|
|
cliError(
|
|
"WAIT_TIMEOUT",
|
|
`Timed out after ${timeoutMs}ms waiting for --wait-until=${waitUntil}`,
|
|
);
|
|
}
|
|
}
|
|
|
|
async function waitForJs(driver) {
|
|
if (!waitJs) {
|
|
return;
|
|
}
|
|
|
|
const start = Date.now();
|
|
while (Date.now() - start < timeoutMs) {
|
|
let result;
|
|
|
|
try {
|
|
result = await driver.executeScript(waitJs);
|
|
} catch (err) {
|
|
cliError("SCRIPT_FAILED", `--wait-js failed: ${err.message}`);
|
|
}
|
|
|
|
if (result) {
|
|
return;
|
|
}
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
}
|
|
|
|
cliError("WAIT_TIMEOUT", `Timed out after ${timeoutMs}ms waiting for --wait-js`);
|
|
}
|
|
|
|
async function runPreludeScript(driver) {
|
|
const scriptSource = getPreludeScriptSource();
|
|
if (!scriptSource) {
|
|
return undefined;
|
|
}
|
|
|
|
try {
|
|
return await driver.executeScript(scriptSource);
|
|
} catch (err) {
|
|
cliError("SCRIPT_FAILED", `Prelude script failed: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
async function withPage(targetUrl, action) {
|
|
runContext.targetUrl = targetUrl;
|
|
|
|
return withDriver(async (driver) => {
|
|
// Navigate To Page
|
|
try {
|
|
await driver.get(targetUrl);
|
|
runContext.currentUrl = await driver.getCurrentUrl();
|
|
} catch (err) {
|
|
cliError("NAVIGATION_FAILED", err.message);
|
|
}
|
|
|
|
// Wait For Page Readiness
|
|
await waitForReadyState(driver);
|
|
await waitForJs(driver);
|
|
|
|
// Run Prelude Script
|
|
const scriptResult = await runPreludeScript(driver);
|
|
|
|
return action(driver, scriptResult);
|
|
});
|
|
}
|
|
|
|
const snapshotScript = `
|
|
const normalize = (value) => String(value || "").replace(/\\s+/g, " ").trim();
|
|
const visibleText = (element) => normalize(element?.innerText || element?.textContent || "");
|
|
const safeValue = (input) => ["password", "hidden"].includes(input.type) ? "" : input.value || "";
|
|
const labelText = (input) => {
|
|
const labels = Array.from(input.labels || []).map((label) => visibleText(label)).filter(Boolean);
|
|
if (labels.length > 0) return labels.join(" ");
|
|
|
|
if (input.id) {
|
|
const label = Array.from(document.querySelectorAll("label[for]"))
|
|
.find((candidate) => candidate.getAttribute("for") === input.id);
|
|
if (label) return visibleText(label);
|
|
}
|
|
|
|
return "";
|
|
};
|
|
const inputSummary = (input) => ({
|
|
type: input.type || input.tagName.toLowerCase(),
|
|
name: input.name || "",
|
|
id: input.id || "",
|
|
placeholder: input.placeholder || "",
|
|
value: safeValue(input),
|
|
label: labelText(input),
|
|
});
|
|
const collectHeadings = () => {
|
|
try {
|
|
return Array.from(document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']"))
|
|
.map((heading) => {
|
|
const tagLevel = heading.tagName.match(/^H([1-6])$/i)?.[1];
|
|
const ariaLevel = heading.getAttribute("aria-level");
|
|
const level = Number.parseInt(tagLevel || ariaLevel || "0", 10);
|
|
const text = visibleText(heading);
|
|
|
|
return text ? { level: level || null, text } : null;
|
|
})
|
|
.filter(Boolean);
|
|
} catch {
|
|
return [];
|
|
}
|
|
};
|
|
|
|
return {
|
|
text: normalize(document.body?.innerText || ""),
|
|
headings: collectHeadings(),
|
|
links: Array.from(document.querySelectorAll("a[href]"))
|
|
.map((link) => ({ text: visibleText(link), href: link.href }))
|
|
.filter((link) => link.text || link.href),
|
|
buttons: Array.from(document.querySelectorAll("button,input[type='button'],input[type='submit'],input[type='reset'],[role='button']"))
|
|
.map((button) => ({
|
|
text: visibleText(button) || button.value || button.getAttribute("aria-label") || "",
|
|
type: button.type || button.getAttribute("role") || "button",
|
|
name: button.name || "",
|
|
id: button.id || "",
|
|
}))
|
|
.filter((button) => button.text || button.name || button.id),
|
|
inputs: Array.from(document.querySelectorAll("input,textarea,select"))
|
|
.map(inputSummary),
|
|
forms: Array.from(document.querySelectorAll("form"))
|
|
.map((form) => ({
|
|
action: form.action || "",
|
|
method: (form.method || "get").toLowerCase(),
|
|
text: visibleText(form),
|
|
inputs: Array.from(form.querySelectorAll("input,textarea,select")).map(inputSummary),
|
|
})),
|
|
};
|
|
`;
|
|
|
|
async function snapshotCommand() {
|
|
const [targetUrl] = getPositionalArgs();
|
|
|
|
if (!targetUrl) usage();
|
|
|
|
return withPage(targetUrl, async (driver) => {
|
|
// Capture Page Metadata
|
|
const [url, title, result] = await Promise.all([
|
|
driver.getCurrentUrl(),
|
|
driver.getTitle(),
|
|
driver.executeScript(snapshotScript),
|
|
]);
|
|
|
|
return {
|
|
ok: true,
|
|
url,
|
|
title,
|
|
result,
|
|
};
|
|
});
|
|
}
|
|
|
|
async function execCommand() {
|
|
const [targetUrl] = getPositionalArgs();
|
|
|
|
if (!targetUrl || (!inlineJs && !scriptPath)) usage();
|
|
|
|
return withPage(targetUrl, async (_driver, scriptResult) => scriptResult);
|
|
}
|
|
|
|
async function screenshotCommand() {
|
|
const [targetUrl] = getPositionalArgs();
|
|
const outputPath = getOption("--output") ?? "screenshot.png";
|
|
|
|
if (!targetUrl) usage();
|
|
|
|
return withPage(targetUrl, async (driver) => {
|
|
// Save Screenshot
|
|
const image = await driver.takeScreenshot();
|
|
writeFileSync(outputPath, image, "base64");
|
|
|
|
return {
|
|
ok: true,
|
|
result: {
|
|
path: outputPath,
|
|
},
|
|
};
|
|
});
|
|
}
|
|
|
|
function markdownTitle(text) {
|
|
return text.replaceAll(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function articleToMarkdown(article) {
|
|
const turndown = new TurndownService({
|
|
headingStyle: "atx",
|
|
codeBlockStyle: "fenced",
|
|
});
|
|
|
|
// Convert Reader HTML
|
|
const body = turndown.turndown(article.html).trim();
|
|
const parts = [];
|
|
|
|
// Add Article Metadata
|
|
if (article.title) {
|
|
parts.push(`# ${markdownTitle(article.title)}`);
|
|
}
|
|
if (article.byline) {
|
|
parts.push(`_${markdownTitle(article.byline)}_`);
|
|
}
|
|
if (body) {
|
|
parts.push(body);
|
|
}
|
|
|
|
return `${parts.join("\n\n").trim()}\n`;
|
|
}
|
|
|
|
function renderReaderOutput(article, format) {
|
|
switch (format) {
|
|
case "markdown":
|
|
return article.markdown;
|
|
case "html":
|
|
return article.html;
|
|
case "text":
|
|
return article.text;
|
|
case "json":
|
|
return article;
|
|
default:
|
|
cliError(
|
|
"INVALID_OPTION",
|
|
`Unsupported reader format: ${format}. Expected markdown, html, text, or json.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
async function searchCommand() {
|
|
const provider = getOption("--provider") ?? "kagi";
|
|
const query = getPositionalArgs().join(" ");
|
|
|
|
if (!query) usage();
|
|
|
|
// Run Provider Search
|
|
switch (provider) {
|
|
case "kagi":
|
|
return searchKagi({
|
|
query,
|
|
token: getOption("--token"),
|
|
headless,
|
|
existingUrl,
|
|
timeoutMs,
|
|
});
|
|
default:
|
|
cliError(
|
|
"UNSUPPORTED_SEARCH_PROVIDER",
|
|
`Unsupported search provider: ${provider}. Expected kagi.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
async function readerCommand() {
|
|
const [targetUrl] = getPositionalArgs();
|
|
const outputPath = getOption("--output");
|
|
const format = getOption("--format") ?? "markdown";
|
|
|
|
if (!targetUrl) usage();
|
|
|
|
return withPage(targetUrl, async (driver) => {
|
|
// Capture Final Url
|
|
const finalUrl = await driver.getCurrentUrl();
|
|
|
|
// Open Firefox Reader View
|
|
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
|
|
await driver.get(readerUrl);
|
|
|
|
// Wait For Reader Content
|
|
let article;
|
|
try {
|
|
article = await driver.wait(async () => {
|
|
return driver.executeScript(`
|
|
const content = document.querySelector("#moz-reader-content, .moz-reader-content");
|
|
const error = document.querySelector(".reader-error");
|
|
const text = content?.innerText?.trim() || "";
|
|
|
|
if (text) {
|
|
return {
|
|
title: document.querySelector("h1.reader-title")?.textContent?.trim() || document.title || "",
|
|
byline: document.querySelector(".reader-byline, .reader-credits")?.textContent?.trim() || "",
|
|
siteName: document.querySelector(".reader-domain")?.textContent?.trim() || "",
|
|
html: content.innerHTML,
|
|
text,
|
|
readerUrl: location.href,
|
|
};
|
|
}
|
|
|
|
if (error?.textContent?.trim()) {
|
|
throw new Error(error.textContent.trim());
|
|
}
|
|
|
|
return null;
|
|
`);
|
|
}, timeoutMs, `No readable article content found for URL: ${targetUrl}`);
|
|
} catch (err) {
|
|
cliError("TIMEOUT", err.message);
|
|
}
|
|
|
|
// Render Output
|
|
article.sourceUrl = targetUrl;
|
|
article.finalUrl = finalUrl;
|
|
article.markdown = articleToMarkdown(article);
|
|
const output = renderReaderOutput(article, format);
|
|
|
|
if (outputPath) {
|
|
writeFileSync(
|
|
outputPath,
|
|
typeof output === "object" ? JSON.stringify(output, null, 2) : output,
|
|
);
|
|
return {
|
|
ok: true,
|
|
result: {
|
|
path: outputPath,
|
|
},
|
|
};
|
|
}
|
|
|
|
return output;
|
|
});
|
|
}
|
|
|
|
async function main() {
|
|
if (!command || command === "--help") {
|
|
printHelp();
|
|
return undefined;
|
|
}
|
|
|
|
validateCommonOptions();
|
|
|
|
switch (command) {
|
|
case "snapshot":
|
|
return snapshotCommand();
|
|
case "exec":
|
|
return execCommand();
|
|
case "screenshot":
|
|
return screenshotCommand();
|
|
case "reader":
|
|
return readerCommand();
|
|
case "search":
|
|
return searchCommand();
|
|
default:
|
|
unknownCommand(command);
|
|
}
|
|
}
|
|
|
|
main()
|
|
.then(printResult)
|
|
.catch((err) => {
|
|
const code = err.code || "COMMAND_FAILED";
|
|
const output = {
|
|
ok: false,
|
|
error: {
|
|
code,
|
|
message: err.message,
|
|
},
|
|
elapsedMs: elapsedMs(),
|
|
};
|
|
|
|
if (runContext.currentUrl || runContext.targetUrl) {
|
|
output.url = runContext.currentUrl || runContext.targetUrl;
|
|
}
|
|
|
|
console.error(JSON.stringify(output, null, 2));
|
|
process.exit(1);
|
|
});
|