Files
glimpse/src/index.ts

585 lines
16 KiB
JavaScript
Executable File

#!/usr/bin/env node
import { loadConfig, type GlimpseConfig } from "./config.js";
import { createDriver } from "./driver.js";
import { searchKagi } from "./providers/kagi.js";
import { readFileSync, writeFileSync } from "node:fs";
import TurndownService from "turndown";
const DEFAULT_TIMEOUT_MS = 10000;
const POLL_INTERVAL_MS = 200;
const startTime = Date.now();
const runContext: { targetUrl?: string; currentUrl?: string } = {};
// Parse CLI Args
const [command, ...args] = process.argv.slice(2);
const headless = !args.includes("--no-headless");
const existingUrl = getOption("--url");
const inlineJs = getOption("--js");
const scriptPath = getOption("--script");
const waitJs = getOption("--wait-js");
const waitUntil = getOption("--wait-until") ?? "none";
const configPath = getOption("--config");
let appConfig: GlimpseConfig = {};
let timeoutMs = DEFAULT_TIMEOUT_MS;
function getOption(name) {
const prefix = `${name}=`;
return args.find((arg) => arg.startsWith(prefix))?.slice(prefix.length);
}
function getPositionalArgs() {
return args.filter((arg) => !arg.startsWith("--"));
}
function elapsedMs() {
return Date.now() - startTime;
}
function printResult(result) {
if (result === undefined) {
return;
}
const outputValue =
result && typeof result === "object" && !Array.isArray(result)
? { ...result, elapsedMs: result.elapsedMs ?? elapsedMs() }
: result;
const output =
typeof outputValue === "object"
? JSON.stringify(outputValue, null, 2)
: String(outputValue);
process.stdout.write(output.endsWith("\n") ? output : `${output}\n`);
}
class CliError extends Error {
code: string;
details: Record<string, unknown>;
constructor(code, message, details = {}) {
super(message);
this.code = code;
this.details = details;
}
}
function cliError(code, message, details = {}) {
throw new CliError(code, message, details);
}
function unknownCommand(name) {
cliError("UNKNOWN_COMMAND", `Unknown command: ${name}`);
}
function helpText() {
return `Usage: glimpse <command> <url> [options]
Commands:
snapshot <url> [options] Return an agent-friendly page snapshot as JSON
exec <url> [options] Execute JavaScript on a page and return the result
screenshot <url> [options] Save a PNG screenshot of a page
reader <url> [options] Extract Firefox Reader View content as Markdown
search <query> [options] Search using a supported provider and return JSON results
Common Options:
--help Show this help
--no-headless Show Firefox instead of running headless
--url=<server> Connect to an existing WebDriver server
--timeout=<ms> Maximum wait time in milliseconds (default: 10000)
--wait-js=<code> Poll JS until it returns a truthy value
--wait-until=<state> Wait for readiness: none, interactive, complete (default: none)
--js=<code> Execute inline JS before command logic
--script=<file> Execute JS from a file before command logic
--config=<file> Read config from a custom path
Exec Options:
--js=<code> Return the top-level JS result
--script=<file> Return the top-level script result
Screenshot Options:
--output=<file> Output PNG path (default: screenshot.png)
Reader Options:
--format=<format> Output format: markdown, html, text, json (default: markdown)
--output=<file> Write output to a file
Search Options:
--provider=<provider> Search provider: kagi (default: config or kagi)
--token=<token> Kagi token (default: KAGI_TOKEN or config)
Examples:
glimpse snapshot https://example.com
glimpse exec https://example.com --js="return document.title"
glimpse exec https://example.com --script=extract.js
glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png
glimpse reader https://example.com/article --script=prepare.js --output=article.md
KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`;
}
function printHelp() {
process.stdout.write(`${helpText()}\n`);
}
function usage() {
cliError(
"USAGE_ERROR",
"Usage: glimpse <command> <url> [options]. Run glimpse --help for details.",
);
}
function parseTimeout() {
const value = getOption("--timeout");
if (value === undefined) {
return DEFAULT_TIMEOUT_MS;
}
const parsed = Number.parseInt(value, 10);
if (!Number.isInteger(parsed) || parsed <= 0 || String(parsed) !== value) {
cliError("INVALID_OPTION", "--timeout must be a positive integer.");
}
return parsed;
}
function validateCommonOptions() {
if (inlineJs && scriptPath) {
cliError("INVALID_OPTION", "Use either --js or --script, not both.");
}
// Validate Timeout
timeoutMs = parseTimeout();
// Validate Wait State
if (!["none", "interactive", "complete"].includes(waitUntil)) {
cliError(
"INVALID_OPTION",
`Unsupported --wait-until value: ${waitUntil}. Expected none, interactive, or complete.`,
);
}
}
function getPreludeScriptSource() {
if (scriptPath) {
return readFileSync(scriptPath, "utf-8");
}
return inlineJs;
}
async function withDriver(action) {
let driver;
try {
driver = await createDriver({ headless, existingUrl });
} catch (err) {
cliError("BROWSER_START_FAILED", err.message);
}
try {
return await action(driver);
} finally {
await driver.quit();
}
}
async function waitForReadyState(driver) {
if (waitUntil === "none") {
return;
}
try {
await driver.wait(async () => {
const readyState = await driver.executeScript(
"return document.readyState",
);
return waitUntil === "interactive"
? ["interactive", "complete"].includes(readyState)
: readyState === "complete";
}, timeoutMs);
} catch {
cliError(
"WAIT_TIMEOUT",
`Timed out after ${timeoutMs}ms waiting for --wait-until=${waitUntil}`,
);
}
}
async function waitForJs(driver) {
if (!waitJs) {
return;
}
const start = Date.now();
while (Date.now() - start < timeoutMs) {
let result;
try {
result = await driver.executeScript(waitJs);
} catch (err) {
cliError("SCRIPT_FAILED", `--wait-js failed: ${err.message}`);
}
if (result) {
return;
}
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
}
cliError(
"WAIT_TIMEOUT",
`Timed out after ${timeoutMs}ms waiting for --wait-js`,
);
}
async function runPreludeScript(driver) {
const scriptSource = getPreludeScriptSource();
if (!scriptSource) {
return undefined;
}
try {
return await driver.executeScript(scriptSource);
} catch (err) {
cliError("SCRIPT_FAILED", `Prelude script failed: ${err.message}`);
}
}
async function withPage(targetUrl, action) {
runContext.targetUrl = targetUrl;
return withDriver(async (driver) => {
// Navigate To Page
try {
await driver.get(targetUrl);
runContext.currentUrl = await driver.getCurrentUrl();
} catch (err) {
cliError("NAVIGATION_FAILED", err.message);
}
// Wait For Page Readiness
await waitForReadyState(driver);
await waitForJs(driver);
// Run Prelude Script
const scriptResult = await runPreludeScript(driver);
return action(driver, scriptResult);
});
}
const snapshotScript = `
const normalize = (value) => String(value || "").replace(/\\s+/g, " ").trim();
const visibleText = (element) => normalize(element?.innerText || element?.textContent || "");
const safeValue = (input) => ["password", "hidden"].includes(input.type) ? "" : input.value || "";
const labelText = (input) => {
const labels = Array.from(input.labels || []).map((label) => visibleText(label)).filter(Boolean);
if (labels.length > 0) return labels.join(" ");
if (input.id) {
const label = Array.from(document.querySelectorAll("label[for]"))
.find((candidate) => candidate.getAttribute("for") === input.id);
if (label) return visibleText(label);
}
return "";
};
const inputSummary = (input) => ({
type: input.type || input.tagName.toLowerCase(),
name: input.name || "",
id: input.id || "",
placeholder: input.placeholder || "",
value: safeValue(input),
label: labelText(input),
});
const collectHeadings = () => {
try {
return Array.from(document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']"))
.map((heading) => {
const tagLevel = heading.tagName.match(/^H([1-6])$/i)?.[1];
const ariaLevel = heading.getAttribute("aria-level");
const level = Number.parseInt(tagLevel || ariaLevel || "0", 10);
const text = visibleText(heading);
return text ? { level: level || null, text } : null;
})
.filter(Boolean);
} catch {
return [];
}
};
return {
text: normalize(document.body?.innerText || ""),
headings: collectHeadings(),
links: Array.from(document.querySelectorAll("a[href]"))
.map((link) => ({ text: visibleText(link), href: link.href }))
.filter((link) => link.text || link.href),
buttons: Array.from(document.querySelectorAll("button,input[type='button'],input[type='submit'],input[type='reset'],[role='button']"))
.map((button) => ({
text: visibleText(button) || button.value || button.getAttribute("aria-label") || "",
type: button.type || button.getAttribute("role") || "button",
name: button.name || "",
id: button.id || "",
}))
.filter((button) => button.text || button.name || button.id),
inputs: Array.from(document.querySelectorAll("input,textarea,select"))
.map(inputSummary),
forms: Array.from(document.querySelectorAll("form"))
.map((form) => ({
action: form.action || "",
method: (form.method || "get").toLowerCase(),
text: visibleText(form),
inputs: Array.from(form.querySelectorAll("input,textarea,select")).map(inputSummary),
})),
};
`;
async function snapshotCommand() {
const [targetUrl] = getPositionalArgs();
if (!targetUrl) usage();
return withPage(targetUrl, async (driver) => {
// Capture Page Metadata
const [url, title, result] = await Promise.all([
driver.getCurrentUrl(),
driver.getTitle(),
driver.executeScript(snapshotScript),
]);
return {
ok: true,
url,
title,
result,
};
});
}
async function execCommand() {
const [targetUrl] = getPositionalArgs();
if (!targetUrl || (!inlineJs && !scriptPath)) usage();
return withPage(targetUrl, async (_driver, scriptResult) => scriptResult);
}
async function screenshotCommand() {
const [targetUrl] = getPositionalArgs();
const outputPath = getOption("--output") ?? "screenshot.png";
if (!targetUrl) usage();
return withPage(targetUrl, async (driver) => {
// Save Screenshot
const image = await driver.takeScreenshot();
writeFileSync(outputPath, image, "base64");
return {
ok: true,
result: {
path: outputPath,
},
};
});
}
function markdownTitle(text) {
return text.replaceAll(/\s+/g, " ").trim();
}
function articleToMarkdown(article) {
const turndown = new TurndownService({
headingStyle: "atx",
codeBlockStyle: "fenced",
});
// Convert Reader HTML
const body = turndown.turndown(article.html).trim();
const parts = [];
// Add Article Metadata
if (article.title) {
parts.push(`# ${markdownTitle(article.title)}`);
}
if (article.byline) {
parts.push(`_${markdownTitle(article.byline)}_`);
}
if (body) {
parts.push(body);
}
return `${parts.join("\n\n").trim()}\n`;
}
function renderReaderOutput(article, format) {
switch (format) {
case "markdown":
return article.markdown;
case "html":
return article.html;
case "text":
return article.text;
case "json":
return article;
default:
cliError(
"INVALID_OPTION",
`Unsupported reader format: ${format}. Expected markdown, html, text, or json.`,
);
}
}
async function searchCommand() {
const provider =
getOption("--provider") ?? appConfig.search?.provider ?? "kagi";
const query = getPositionalArgs().join(" ");
if (!query) usage();
// Run Provider Search
switch (provider) {
case "kagi":
return searchKagi({
query,
token: getOption("--token"),
config: appConfig,
headless,
existingUrl,
timeoutMs,
});
default:
cliError(
"UNSUPPORTED_SEARCH_PROVIDER",
`Unsupported search provider: ${provider}. Expected kagi.`,
);
}
}
async function readerCommand() {
const [targetUrl] = getPositionalArgs();
const outputPath = getOption("--output");
const format = getOption("--format") ?? "markdown";
if (!targetUrl) usage();
return withPage(targetUrl, async (driver) => {
// Capture Final Url
const finalUrl = await driver.getCurrentUrl();
// Open Firefox Reader View
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
await driver.get(readerUrl);
// Wait For Reader Content
let article;
try {
article = await driver.wait(
async () => {
return driver.executeScript(`
const content = document.querySelector("#moz-reader-content, .moz-reader-content");
const error = document.querySelector(".reader-error");
const text = content?.innerText?.trim() || "";
if (text) {
return {
title: document.querySelector("h1.reader-title")?.textContent?.trim() || document.title || "",
byline: document.querySelector(".reader-byline, .reader-credits")?.textContent?.trim() || "",
siteName: document.querySelector(".reader-domain")?.textContent?.trim() || "",
html: content.innerHTML,
text,
readerUrl: location.href,
};
}
if (error?.textContent?.trim()) {
throw new Error(error.textContent.trim());
}
return null;
`);
},
timeoutMs,
`No readable article content found for URL: ${targetUrl}`,
);
} catch (err) {
cliError("TIMEOUT", err.message);
}
// Render Output
article.sourceUrl = targetUrl;
article.finalUrl = finalUrl;
article.markdown = articleToMarkdown(article);
const output = renderReaderOutput(article, format);
if (outputPath) {
writeFileSync(
outputPath,
typeof output === "object" ? JSON.stringify(output, null, 2) : output,
);
return {
ok: true,
result: {
path: outputPath,
},
};
}
return output;
});
}
async function main() {
if (!command || command === "--help") {
printHelp();
return undefined;
}
validateCommonOptions();
// Load Config
appConfig = loadConfig({ path: configPath });
switch (command) {
case "snapshot":
return snapshotCommand();
case "exec":
return execCommand();
case "screenshot":
return screenshotCommand();
case "reader":
return readerCommand();
case "search":
return searchCommand();
default:
unknownCommand(command);
}
}
main()
.then(printResult)
.catch((err) => {
const code = err.code || "COMMAND_FAILED";
const output: {
ok: false;
error: { code: string; message: string };
elapsedMs: number;
url?: string;
} = {
ok: false,
error: {
code,
message: err.message,
},
elapsedMs: elapsedMs(),
};
if (runContext.currentUrl || runContext.targetUrl) {
output.url = runContext.currentUrl || runContext.targetUrl;
}
console.error(JSON.stringify(output, null, 2));
process.exit(1);
});