feat(config): add TypeScript build and config support
This commit is contained in:
584
src/index.ts
Executable file
584
src/index.ts
Executable file
@@ -0,0 +1,584 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { loadConfig, type GlimpseConfig } from "./config.js";
|
||||
import { createDriver } from "./driver.js";
|
||||
import { searchKagi } from "./providers/kagi.js";
|
||||
import { readFileSync, writeFileSync } from "node:fs";
|
||||
import TurndownService from "turndown";
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 10000;
|
||||
const POLL_INTERVAL_MS = 200;
|
||||
const startTime = Date.now();
|
||||
const runContext: { targetUrl?: string; currentUrl?: string } = {};
|
||||
|
||||
// Parse CLI Args
|
||||
const [command, ...args] = process.argv.slice(2);
|
||||
const headless = !args.includes("--no-headless");
|
||||
const existingUrl = getOption("--url");
|
||||
const inlineJs = getOption("--js");
|
||||
const scriptPath = getOption("--script");
|
||||
const waitJs = getOption("--wait-js");
|
||||
const waitUntil = getOption("--wait-until") ?? "none";
|
||||
const configPath = getOption("--config");
|
||||
let appConfig: GlimpseConfig = {};
|
||||
let timeoutMs = DEFAULT_TIMEOUT_MS;
|
||||
|
||||
function getOption(name) {
|
||||
const prefix = `${name}=`;
|
||||
return args.find((arg) => arg.startsWith(prefix))?.slice(prefix.length);
|
||||
}
|
||||
|
||||
function getPositionalArgs() {
|
||||
return args.filter((arg) => !arg.startsWith("--"));
|
||||
}
|
||||
|
||||
function elapsedMs() {
|
||||
return Date.now() - startTime;
|
||||
}
|
||||
|
||||
function printResult(result) {
|
||||
if (result === undefined) {
|
||||
return;
|
||||
}
|
||||
|
||||
const outputValue =
|
||||
result && typeof result === "object" && !Array.isArray(result)
|
||||
? { ...result, elapsedMs: result.elapsedMs ?? elapsedMs() }
|
||||
: result;
|
||||
const output =
|
||||
typeof outputValue === "object"
|
||||
? JSON.stringify(outputValue, null, 2)
|
||||
: String(outputValue);
|
||||
process.stdout.write(output.endsWith("\n") ? output : `${output}\n`);
|
||||
}
|
||||
|
||||
class CliError extends Error {
|
||||
code: string;
|
||||
details: Record<string, unknown>;
|
||||
|
||||
constructor(code, message, details = {}) {
|
||||
super(message);
|
||||
this.code = code;
|
||||
this.details = details;
|
||||
}
|
||||
}
|
||||
|
||||
function cliError(code, message, details = {}) {
|
||||
throw new CliError(code, message, details);
|
||||
}
|
||||
|
||||
function unknownCommand(name) {
|
||||
cliError("UNKNOWN_COMMAND", `Unknown command: ${name}`);
|
||||
}
|
||||
|
||||
function helpText() {
|
||||
return `Usage: glimpse <command> <url> [options]
|
||||
|
||||
Commands:
|
||||
snapshot <url> [options] Return an agent-friendly page snapshot as JSON
|
||||
exec <url> [options] Execute JavaScript on a page and return the result
|
||||
screenshot <url> [options] Save a PNG screenshot of a page
|
||||
reader <url> [options] Extract Firefox Reader View content as Markdown
|
||||
search <query> [options] Search using a supported provider and return JSON results
|
||||
|
||||
Common Options:
|
||||
--help Show this help
|
||||
--no-headless Show Firefox instead of running headless
|
||||
--url=<server> Connect to an existing WebDriver server
|
||||
--timeout=<ms> Maximum wait time in milliseconds (default: 10000)
|
||||
--wait-js=<code> Poll JS until it returns a truthy value
|
||||
--wait-until=<state> Wait for readiness: none, interactive, complete (default: none)
|
||||
--js=<code> Execute inline JS before command logic
|
||||
--script=<file> Execute JS from a file before command logic
|
||||
--config=<file> Read config from a custom path
|
||||
|
||||
Exec Options:
|
||||
--js=<code> Return the top-level JS result
|
||||
--script=<file> Return the top-level script result
|
||||
|
||||
Screenshot Options:
|
||||
--output=<file> Output PNG path (default: screenshot.png)
|
||||
|
||||
Reader Options:
|
||||
--format=<format> Output format: markdown, html, text, json (default: markdown)
|
||||
--output=<file> Write output to a file
|
||||
|
||||
Search Options:
|
||||
--provider=<provider> Search provider: kagi (default: config or kagi)
|
||||
--token=<token> Kagi token (default: KAGI_TOKEN or config)
|
||||
|
||||
Examples:
|
||||
glimpse snapshot https://example.com
|
||||
glimpse exec https://example.com --js="return document.title"
|
||||
glimpse exec https://example.com --script=extract.js
|
||||
glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png
|
||||
glimpse reader https://example.com/article --script=prepare.js --output=article.md
|
||||
KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`;
|
||||
}
|
||||
|
||||
function printHelp() {
|
||||
process.stdout.write(`${helpText()}\n`);
|
||||
}
|
||||
|
||||
function usage() {
|
||||
cliError(
|
||||
"USAGE_ERROR",
|
||||
"Usage: glimpse <command> <url> [options]. Run glimpse --help for details.",
|
||||
);
|
||||
}
|
||||
|
||||
function parseTimeout() {
|
||||
const value = getOption("--timeout");
|
||||
if (value === undefined) {
|
||||
return DEFAULT_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
if (!Number.isInteger(parsed) || parsed <= 0 || String(parsed) !== value) {
|
||||
cliError("INVALID_OPTION", "--timeout must be a positive integer.");
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function validateCommonOptions() {
|
||||
if (inlineJs && scriptPath) {
|
||||
cliError("INVALID_OPTION", "Use either --js or --script, not both.");
|
||||
}
|
||||
|
||||
// Validate Timeout
|
||||
timeoutMs = parseTimeout();
|
||||
|
||||
// Validate Wait State
|
||||
if (!["none", "interactive", "complete"].includes(waitUntil)) {
|
||||
cliError(
|
||||
"INVALID_OPTION",
|
||||
`Unsupported --wait-until value: ${waitUntil}. Expected none, interactive, or complete.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function getPreludeScriptSource() {
|
||||
if (scriptPath) {
|
||||
return readFileSync(scriptPath, "utf-8");
|
||||
}
|
||||
|
||||
return inlineJs;
|
||||
}
|
||||
|
||||
async function withDriver(action) {
|
||||
let driver;
|
||||
|
||||
try {
|
||||
driver = await createDriver({ headless, existingUrl });
|
||||
} catch (err) {
|
||||
cliError("BROWSER_START_FAILED", err.message);
|
||||
}
|
||||
|
||||
try {
|
||||
return await action(driver);
|
||||
} finally {
|
||||
await driver.quit();
|
||||
}
|
||||
}
|
||||
|
||||
async function waitForReadyState(driver) {
|
||||
if (waitUntil === "none") {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await driver.wait(async () => {
|
||||
const readyState = await driver.executeScript(
|
||||
"return document.readyState",
|
||||
);
|
||||
return waitUntil === "interactive"
|
||||
? ["interactive", "complete"].includes(readyState)
|
||||
: readyState === "complete";
|
||||
}, timeoutMs);
|
||||
} catch {
|
||||
cliError(
|
||||
"WAIT_TIMEOUT",
|
||||
`Timed out after ${timeoutMs}ms waiting for --wait-until=${waitUntil}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function waitForJs(driver) {
|
||||
if (!waitJs) {
|
||||
return;
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
let result;
|
||||
|
||||
try {
|
||||
result = await driver.executeScript(waitJs);
|
||||
} catch (err) {
|
||||
cliError("SCRIPT_FAILED", `--wait-js failed: ${err.message}`);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
return;
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
||||
}
|
||||
|
||||
cliError(
|
||||
"WAIT_TIMEOUT",
|
||||
`Timed out after ${timeoutMs}ms waiting for --wait-js`,
|
||||
);
|
||||
}
|
||||
|
||||
async function runPreludeScript(driver) {
|
||||
const scriptSource = getPreludeScriptSource();
|
||||
if (!scriptSource) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
return await driver.executeScript(scriptSource);
|
||||
} catch (err) {
|
||||
cliError("SCRIPT_FAILED", `Prelude script failed: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function withPage(targetUrl, action) {
|
||||
runContext.targetUrl = targetUrl;
|
||||
|
||||
return withDriver(async (driver) => {
|
||||
// Navigate To Page
|
||||
try {
|
||||
await driver.get(targetUrl);
|
||||
runContext.currentUrl = await driver.getCurrentUrl();
|
||||
} catch (err) {
|
||||
cliError("NAVIGATION_FAILED", err.message);
|
||||
}
|
||||
|
||||
// Wait For Page Readiness
|
||||
await waitForReadyState(driver);
|
||||
await waitForJs(driver);
|
||||
|
||||
// Run Prelude Script
|
||||
const scriptResult = await runPreludeScript(driver);
|
||||
|
||||
return action(driver, scriptResult);
|
||||
});
|
||||
}
|
||||
|
||||
const snapshotScript = `
|
||||
const normalize = (value) => String(value || "").replace(/\\s+/g, " ").trim();
|
||||
const visibleText = (element) => normalize(element?.innerText || element?.textContent || "");
|
||||
const safeValue = (input) => ["password", "hidden"].includes(input.type) ? "" : input.value || "";
|
||||
const labelText = (input) => {
|
||||
const labels = Array.from(input.labels || []).map((label) => visibleText(label)).filter(Boolean);
|
||||
if (labels.length > 0) return labels.join(" ");
|
||||
|
||||
if (input.id) {
|
||||
const label = Array.from(document.querySelectorAll("label[for]"))
|
||||
.find((candidate) => candidate.getAttribute("for") === input.id);
|
||||
if (label) return visibleText(label);
|
||||
}
|
||||
|
||||
return "";
|
||||
};
|
||||
const inputSummary = (input) => ({
|
||||
type: input.type || input.tagName.toLowerCase(),
|
||||
name: input.name || "",
|
||||
id: input.id || "",
|
||||
placeholder: input.placeholder || "",
|
||||
value: safeValue(input),
|
||||
label: labelText(input),
|
||||
});
|
||||
const collectHeadings = () => {
|
||||
try {
|
||||
return Array.from(document.querySelectorAll("h1,h2,h3,h4,h5,h6,[role='heading']"))
|
||||
.map((heading) => {
|
||||
const tagLevel = heading.tagName.match(/^H([1-6])$/i)?.[1];
|
||||
const ariaLevel = heading.getAttribute("aria-level");
|
||||
const level = Number.parseInt(tagLevel || ariaLevel || "0", 10);
|
||||
const text = visibleText(heading);
|
||||
|
||||
return text ? { level: level || null, text } : null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
text: normalize(document.body?.innerText || ""),
|
||||
headings: collectHeadings(),
|
||||
links: Array.from(document.querySelectorAll("a[href]"))
|
||||
.map((link) => ({ text: visibleText(link), href: link.href }))
|
||||
.filter((link) => link.text || link.href),
|
||||
buttons: Array.from(document.querySelectorAll("button,input[type='button'],input[type='submit'],input[type='reset'],[role='button']"))
|
||||
.map((button) => ({
|
||||
text: visibleText(button) || button.value || button.getAttribute("aria-label") || "",
|
||||
type: button.type || button.getAttribute("role") || "button",
|
||||
name: button.name || "",
|
||||
id: button.id || "",
|
||||
}))
|
||||
.filter((button) => button.text || button.name || button.id),
|
||||
inputs: Array.from(document.querySelectorAll("input,textarea,select"))
|
||||
.map(inputSummary),
|
||||
forms: Array.from(document.querySelectorAll("form"))
|
||||
.map((form) => ({
|
||||
action: form.action || "",
|
||||
method: (form.method || "get").toLowerCase(),
|
||||
text: visibleText(form),
|
||||
inputs: Array.from(form.querySelectorAll("input,textarea,select")).map(inputSummary),
|
||||
})),
|
||||
};
|
||||
`;
|
||||
|
||||
async function snapshotCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
|
||||
if (!targetUrl) usage();
|
||||
|
||||
return withPage(targetUrl, async (driver) => {
|
||||
// Capture Page Metadata
|
||||
const [url, title, result] = await Promise.all([
|
||||
driver.getCurrentUrl(),
|
||||
driver.getTitle(),
|
||||
driver.executeScript(snapshotScript),
|
||||
]);
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
url,
|
||||
title,
|
||||
result,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function execCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
|
||||
if (!targetUrl || (!inlineJs && !scriptPath)) usage();
|
||||
|
||||
return withPage(targetUrl, async (_driver, scriptResult) => scriptResult);
|
||||
}
|
||||
|
||||
async function screenshotCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
const outputPath = getOption("--output") ?? "screenshot.png";
|
||||
|
||||
if (!targetUrl) usage();
|
||||
|
||||
return withPage(targetUrl, async (driver) => {
|
||||
// Save Screenshot
|
||||
const image = await driver.takeScreenshot();
|
||||
writeFileSync(outputPath, image, "base64");
|
||||
|
||||
return {
|
||||
ok: true,
|
||||
result: {
|
||||
path: outputPath,
|
||||
},
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function markdownTitle(text) {
|
||||
return text.replaceAll(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
function articleToMarkdown(article) {
|
||||
const turndown = new TurndownService({
|
||||
headingStyle: "atx",
|
||||
codeBlockStyle: "fenced",
|
||||
});
|
||||
|
||||
// Convert Reader HTML
|
||||
const body = turndown.turndown(article.html).trim();
|
||||
const parts = [];
|
||||
|
||||
// Add Article Metadata
|
||||
if (article.title) {
|
||||
parts.push(`# ${markdownTitle(article.title)}`);
|
||||
}
|
||||
if (article.byline) {
|
||||
parts.push(`_${markdownTitle(article.byline)}_`);
|
||||
}
|
||||
if (body) {
|
||||
parts.push(body);
|
||||
}
|
||||
|
||||
return `${parts.join("\n\n").trim()}\n`;
|
||||
}
|
||||
|
||||
function renderReaderOutput(article, format) {
|
||||
switch (format) {
|
||||
case "markdown":
|
||||
return article.markdown;
|
||||
case "html":
|
||||
return article.html;
|
||||
case "text":
|
||||
return article.text;
|
||||
case "json":
|
||||
return article;
|
||||
default:
|
||||
cliError(
|
||||
"INVALID_OPTION",
|
||||
`Unsupported reader format: ${format}. Expected markdown, html, text, or json.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function searchCommand() {
|
||||
const provider =
|
||||
getOption("--provider") ?? appConfig.search?.provider ?? "kagi";
|
||||
const query = getPositionalArgs().join(" ");
|
||||
|
||||
if (!query) usage();
|
||||
|
||||
// Run Provider Search
|
||||
switch (provider) {
|
||||
case "kagi":
|
||||
return searchKagi({
|
||||
query,
|
||||
token: getOption("--token"),
|
||||
config: appConfig,
|
||||
headless,
|
||||
existingUrl,
|
||||
timeoutMs,
|
||||
});
|
||||
default:
|
||||
cliError(
|
||||
"UNSUPPORTED_SEARCH_PROVIDER",
|
||||
`Unsupported search provider: ${provider}. Expected kagi.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function readerCommand() {
|
||||
const [targetUrl] = getPositionalArgs();
|
||||
const outputPath = getOption("--output");
|
||||
const format = getOption("--format") ?? "markdown";
|
||||
|
||||
if (!targetUrl) usage();
|
||||
|
||||
return withPage(targetUrl, async (driver) => {
|
||||
// Capture Final Url
|
||||
const finalUrl = await driver.getCurrentUrl();
|
||||
|
||||
// Open Firefox Reader View
|
||||
const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
|
||||
await driver.get(readerUrl);
|
||||
|
||||
// Wait For Reader Content
|
||||
let article;
|
||||
try {
|
||||
article = await driver.wait(
|
||||
async () => {
|
||||
return driver.executeScript(`
|
||||
const content = document.querySelector("#moz-reader-content, .moz-reader-content");
|
||||
const error = document.querySelector(".reader-error");
|
||||
const text = content?.innerText?.trim() || "";
|
||||
|
||||
if (text) {
|
||||
return {
|
||||
title: document.querySelector("h1.reader-title")?.textContent?.trim() || document.title || "",
|
||||
byline: document.querySelector(".reader-byline, .reader-credits")?.textContent?.trim() || "",
|
||||
siteName: document.querySelector(".reader-domain")?.textContent?.trim() || "",
|
||||
html: content.innerHTML,
|
||||
text,
|
||||
readerUrl: location.href,
|
||||
};
|
||||
}
|
||||
|
||||
if (error?.textContent?.trim()) {
|
||||
throw new Error(error.textContent.trim());
|
||||
}
|
||||
|
||||
return null;
|
||||
`);
|
||||
},
|
||||
timeoutMs,
|
||||
`No readable article content found for URL: ${targetUrl}`,
|
||||
);
|
||||
} catch (err) {
|
||||
cliError("TIMEOUT", err.message);
|
||||
}
|
||||
|
||||
// Render Output
|
||||
article.sourceUrl = targetUrl;
|
||||
article.finalUrl = finalUrl;
|
||||
article.markdown = articleToMarkdown(article);
|
||||
const output = renderReaderOutput(article, format);
|
||||
|
||||
if (outputPath) {
|
||||
writeFileSync(
|
||||
outputPath,
|
||||
typeof output === "object" ? JSON.stringify(output, null, 2) : output,
|
||||
);
|
||||
return {
|
||||
ok: true,
|
||||
result: {
|
||||
path: outputPath,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return output;
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (!command || command === "--help") {
|
||||
printHelp();
|
||||
return undefined;
|
||||
}
|
||||
|
||||
validateCommonOptions();
|
||||
|
||||
// Load Config
|
||||
appConfig = loadConfig({ path: configPath });
|
||||
|
||||
switch (command) {
|
||||
case "snapshot":
|
||||
return snapshotCommand();
|
||||
case "exec":
|
||||
return execCommand();
|
||||
case "screenshot":
|
||||
return screenshotCommand();
|
||||
case "reader":
|
||||
return readerCommand();
|
||||
case "search":
|
||||
return searchCommand();
|
||||
default:
|
||||
unknownCommand(command);
|
||||
}
|
||||
}
|
||||
|
||||
main()
|
||||
.then(printResult)
|
||||
.catch((err) => {
|
||||
const code = err.code || "COMMAND_FAILED";
|
||||
const output: {
|
||||
ok: false;
|
||||
error: { code: string; message: string };
|
||||
elapsedMs: number;
|
||||
url?: string;
|
||||
} = {
|
||||
ok: false,
|
||||
error: {
|
||||
code,
|
||||
message: err.message,
|
||||
},
|
||||
elapsedMs: elapsedMs(),
|
||||
};
|
||||
|
||||
if (runContext.currentUrl || runContext.targetUrl) {
|
||||
output.url = runContext.currentUrl || runContext.targetUrl;
|
||||
}
|
||||
|
||||
console.error(JSON.stringify(output, null, 2));
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user