glimpse/src/index.ts

#!/usr/bin/env node

import { loadConfig, type GlimpseConfig } from "./config.js";
import { createDriver, type WebDriver } from "./driver.js";
import { searchKagi, type SearchResult } from "./providers/kagi.js";
import { readFileSync, writeFileSync } from "node:fs";
import TurndownService from "turndown";

const DEFAULT_TIMEOUT_MS = 10000;
const POLL_INTERVAL_MS = 200;
const startTime = Date.now();
const runContext: { targetUrl?: string; currentUrl?: string } = {};

interface ReaderArticle {
  title?: string;
  byline?: string;
  siteName?: string;
  html?: string;
  text?: string;
  readerUrl?: string;
  sourceUrl?: string;
  finalUrl?: string;
  markdown?: string;
  method?: "reader" | "raw";
}

// Parse CLI Args
const [command, ...args] = process.argv.slice(2);
const headless = !args.includes("--no-headless");
const existingUrl = getOption("--url");
const inlineJs = getOption("--js");
const scriptPath = getOption("--script");
const waitJs = getOption("--wait-js");
const waitUntil = getOption("--wait-until") ?? "none";
const configPath = getOption("--config");
let appConfig: GlimpseConfig = {};
let timeoutMs = DEFAULT_TIMEOUT_MS;

function getOption(name: string) {
  const prefix = `${name}=`;
  return args.find((arg) => arg.startsWith(prefix))?.slice(prefix.length);
}

function getPositionalArgs() {
  return args.filter((arg) => !arg.startsWith("--"));
}

function elapsedMs() {
  return Date.now() - startTime;
}

function printResult(result: unknown) {
  if (result === undefined) {
    return;
  }

  const outputValue =
    result && typeof result === "object" && !Array.isArray(result)
      ? { ...result, elapsedMs: (result as any).elapsedMs ?? elapsedMs() }
      : result;
  const output =
    typeof outputValue === "object"
      ? JSON.stringify(outputValue, null, 2)
      : String(outputValue);
  process.stdout.write(output.endsWith("\n") ? output : `${output}\n`);
}

class CliError extends Error {
  code: string;
  details: Record<string, unknown>;

  constructor(code: string, message: string, details = {}) {
    super(message);
    this.code = code;
    this.details = details;
  }
}

function cliError(code: string, message: string, details = {}) {
  throw new CliError(code, message, details);
}

function unknownCommand(name: string) {
  cliError("UNKNOWN_COMMAND", `Unknown command: ${name}`);
}

function helpText() {
  return `Usage: glimpse <command> <url> [options]

Commands:
  reader <url> [options]      Extract page content as Markdown (Reader View with raw fallback)
  exec <url> [options]        Execute JavaScript on a page and return the result
  screenshot <url> [options]  Save a PNG screenshot of a page
  search <query> [options]    Search using a supported provider and return JSON results

Common Options:
  --help                      Show this help
  --no-headless               Show Firefox instead of running headless
  --url=<server>              Connect to an existing WebDriver server
  --timeout=<ms>              Maximum wait time in milliseconds (default: 10000)
  --wait-js=<code>            Poll JS until it returns a truthy value
  --wait-until=<state>        Wait for readiness: none, interactive, complete (default: none)
  --js=<code>                 Execute inline JS before command logic
  --script=<file>             Execute JS from a file before command logic
  --config=<file>             Read config from a custom path

Exec Options:
  --js=<code>                 Return the top-level JS result
  --script=<file>             Return the top-level script result

Screenshot Options:
  --output=<file>             Output PNG path (default: screenshot.png)

Reader Options:
  --format=<format>           Output format: markdown, html, text, json (default: markdown)
  --output=<file>             Write output to a file
  --no-reader                 Skip Reader View and use raw page extraction

Search Options:
  --provider=<provider>       Search provider: kagi (default: config or kagi)
  --token=<token>             Kagi token (default: KAGI_TOKEN or config)
  --format=<format>           Output format: markdown, json (default: markdown)

Examples:
  glimpse reader https://example.com
  glimpse reader https://example.com --no-reader
  glimpse reader https://example.com/article --output=article.md
  glimpse exec https://example.com --js="return document.title"
  glimpse exec https://example.com --script=extract.js
  glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png
  KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`;
}

function printHelp() {
  process.stdout.write(`${helpText()}\n`);
}

function usage() {
  cliError(
    "USAGE_ERROR",
    "Usage: glimpse <command> <url> [options]. Run glimpse --help for details.",
  );
}

function parseTimeout() {
  const value = getOption("--timeout");
  if (value === undefined) {
    return DEFAULT_TIMEOUT_MS;
  }

  const parsed = Number.parseInt(value, 10);
  if (!Number.isInteger(parsed) || parsed <= 0 || String(parsed) !== value) {
    cliError("INVALID_OPTION", "--timeout must be a positive integer.");
  }

  return parsed;
}

function validateCommonOptions() {
  if (inlineJs && scriptPath) {
    cliError("INVALID_OPTION", "Use either --js or --script, not both.");
  }

  // Validate Timeout
  timeoutMs = parseTimeout();

  // Validate Wait State
  if (!["none", "interactive", "complete"].includes(waitUntil)) {
    cliError(
      "INVALID_OPTION",
      `Unsupported --wait-until value: ${waitUntil}. Expected none, interactive, or complete.`,
    );
  }
}

function getPreludeScriptSource() {
  if (scriptPath) {
    return readFileSync(scriptPath, "utf-8");
  }

  return inlineJs;
}

async function withDriver(action: (driver: WebDriver) => Promise<unknown>) {
  let driver: WebDriver;

  try {
    driver = await createDriver({ headless, existingUrl });
  } catch (err) {
    cliError("BROWSER_START_FAILED", err.message);
  }

  try {
    return await action(driver);
  } finally {
    await driver.quit();
  }
}

async function waitForReadyState(driver: WebDriver) {
  if (waitUntil === "none") {
    return;
  }

  try {
    await driver.wait(async () => {
      const readyState = (await driver.executeScript(
        "return document.readyState",
      )) as string;
      return waitUntil === "interactive"
        ? ["interactive", "complete"].includes(readyState)
        : readyState === "complete";
    }, timeoutMs);
  } catch {
    cliError(
      "WAIT_TIMEOUT",
      `Timed out after ${timeoutMs}ms waiting for --wait-until=${waitUntil}`,
    );
  }
}

async function waitForJs(driver: WebDriver) {
  if (!waitJs) {
    return;
  }

  const start = Date.now();
  while (Date.now() - start < timeoutMs) {
    let result: unknown;

    try {
      result = await driver.executeScript(waitJs);
    } catch (err) {
      cliError("SCRIPT_FAILED", `--wait-js failed: ${err.message}`);
    }

    if (result) {
      return;
    }

    await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
  }

  cliError(
    "WAIT_TIMEOUT",
    `Timed out after ${timeoutMs}ms waiting for --wait-js`,
  );
}

async function runPreludeScript(driver: WebDriver) {
  const scriptSource = getPreludeScriptSource();
  if (!scriptSource) {
    return undefined;
  }

  try {
    return await driver.executeScript(scriptSource);
  } catch (err) {
    cliError("SCRIPT_FAILED", `Prelude script failed: ${err.message}`);
  }
}

async function withPage(
  targetUrl: string,
  action: (driver: WebDriver, scriptResult: unknown) => Promise<unknown>,
) {
  runContext.targetUrl = targetUrl;

  return withDriver(async (driver: WebDriver) => {
    // Navigate To Page
    try {
      await driver.get(targetUrl);
      runContext.currentUrl = await driver.getCurrentUrl();
    } catch (err) {
      cliError("NAVIGATION_FAILED", err.message);
    }

    // Wait For Page Readiness
    await waitForReadyState(driver);
    await waitForJs(driver);

    // Run Prelude Script
    const scriptResult = await runPreludeScript(driver);

    return action(driver, scriptResult);
  });
}

async function execCommand() {
  const [targetUrl] = getPositionalArgs();

  if (!targetUrl || (!inlineJs && !scriptPath)) usage();

  return withPage(targetUrl, async (_driver: WebDriver, scriptResult: unknown) => scriptResult);
}

async function screenshotCommand() {
  const [targetUrl] = getPositionalArgs();
  const outputPath = getOption("--output") ?? "screenshot.png";

  if (!targetUrl) usage();

  return withPage(targetUrl, async (driver: WebDriver) => {
    // Save Screenshot
    const image = await driver.takeScreenshot();
    writeFileSync(outputPath, image, "base64");

    return {
      ok: true,
      result: {
        path: outputPath,
      },
    };
  });
}

function markdownTitle(text: string) {
  return text.replaceAll(/\s+/g, " ").trim();
}

function articleToMarkdown(article: ReaderArticle) {
  const turndown = new TurndownService({
    headingStyle: "atx",
    codeBlockStyle: "fenced",
  });

  // Convert Reader HTML
  const body = turndown.turndown(article.html).trim();
  const parts = [];

  // Add Article Metadata
  if (article.title) {
    parts.push(`# ${markdownTitle(article.title)}`);
  }
  if (article.byline) {
    parts.push(`_${markdownTitle(article.byline)}_`);
  }
  if (body) {
    parts.push(body);
  }

  return `${parts.join("\n\n").trim()}\n`;
}

function renderReaderOutput(article: ReaderArticle, format: string) {
  switch (format) {
    case "markdown":
      return article.markdown;
    case "html":
      return article.html;
    case "text":
      return article.text;
    case "json":
      return article;
    default:
      cliError(
        "INVALID_OPTION",
        `Unsupported reader format: ${format}. Expected markdown, html, text, or json.`,
      );
  }
}

function searchResultsToMarkdown(results: SearchResult[]): string {
  return results
    .map((r) => `## [${r.title}](${r.url})\n> ${r.description}`)
    .join("\n\n")
    .trim();
}

async function searchCommand() {
  const provider =
    getOption("--provider") ?? appConfig.search?.provider ?? "kagi";
  const query = getPositionalArgs().join(" ");
  const format = getOption("--format") ?? "markdown";

  if (!query) usage();

  if (!["markdown", "json"].includes(format)) {
    cliError(
      "INVALID_OPTION",
      `Unsupported search format: ${format}. Expected markdown, json.`,
    );
  }

  // Run Provider Search
  let results: SearchResult[];
  switch (provider) {
    case "kagi":
      results = await searchKagi({
        query,
        token: getOption("--token"),
        config: appConfig,
        headless,
        existingUrl,
        timeoutMs,
      });
      break;
    default:
      cliError(
        "UNSUPPORTED_SEARCH_PROVIDER",
        `Unsupported search provider: ${provider}. Expected kagi.`,
      );
  }

  // Render Output
  switch (format) {
    case "markdown":
      return searchResultsToMarkdown(results);
    case "json":
      return results;
  }
}

// Try Reader View Extraction
async function tryReaderView(
  driver: WebDriver,
  finalUrl: string,
  targetUrl: string,
): Promise<ReaderArticle | null> {
  const readerUrl = `about:reader?url=${encodeURIComponent(finalUrl)}`;
  await driver.get(readerUrl);

  try {
    return await driver.wait(
      async () => {
        return driver.executeScript(`
          const content = document.querySelector("#moz-reader-content, .moz-reader-content");
          const error = document.querySelector(".reader-error");
          const text = content?.innerText?.trim() || "";

          if (text) {
            return {
              title: document.querySelector("h1.reader-title")?.textContent?.trim() || document.title || "",
              byline: document.querySelector(".reader-byline, .reader-credits")?.textContent?.trim() || "",
              siteName: document.querySelector(".reader-domain")?.textContent?.trim() || "",
              html: content.innerHTML,
              text,
              readerUrl: location.href,
            };
          }

          if (error?.textContent?.trim()) {
            throw new Error(error.textContent.trim());
          }

          return null;
        `);
      },
      timeoutMs,
      `No readable article content found for URL: ${targetUrl}`,
    );
  } catch {
    return null;
  }
}

// Raw Page Extraction Fallback
async function extractRawPage(
  driver: WebDriver,
  originalUrl?: string,
): Promise<ReaderArticle> {
  // Navigate Back If Needed (e.g. after failed Reader View)
  if (originalUrl) {
    await driver.get(originalUrl);
  }

  const result = (await driver.executeScript(`
    return {
      title: document.title || "",
      html: document.body?.innerHTML || "",
      text: document.body?.innerText?.trim() || "",
    };
  `)) as { title: string; html: string; text: string };

  return {
    title: result.title,
    html: result.html,
    text: result.text,
  };
}

async function readerCommand() {
  const [targetUrl] = getPositionalArgs();
  const outputPath = getOption("--output");
  const format = getOption("--format") ?? "markdown";
  const skipReader = args.includes("--no-reader");

  if (!targetUrl) usage();

  return withPage(targetUrl, async (driver: WebDriver) => {
    const finalUrl = await driver.getCurrentUrl();

    // Extract Page Content
    let article: ReaderArticle;
    if (!skipReader) {
      article = await tryReaderView(driver, finalUrl, targetUrl);
    }

    // Fallback To Raw Extraction
    if (!article) {
      // Navigate back only if Reader View was attempted
      article = await extractRawPage(driver, skipReader ? undefined : finalUrl);
      article.method = "raw";
    } else {
      article.method = "reader";
    }

    // Build Output
    article.sourceUrl = targetUrl;
    article.finalUrl = finalUrl;
    article.markdown = articleToMarkdown(article);
    const output = renderReaderOutput(article, format);

    if (outputPath) {
      writeFileSync(
        outputPath,
        typeof output === "object" ? JSON.stringify(output, null, 2) : output,
      );
      return {
        ok: true,
        result: {
          path: outputPath,
        },
      };
    }

    return output;
  });
}

async function main() {
  if (!command || command === "--help") {
    printHelp();
    return undefined;
  }

  validateCommonOptions();

  // Load Config
  appConfig = loadConfig({ path: configPath });

  switch (command) {
    case "exec":
      return execCommand();
    case "screenshot":
      return screenshotCommand();
    case "reader":
      return readerCommand();
    case "search":
      return searchCommand();
    default:
      unknownCommand(command);
  }
}

main()
  .then(printResult)
  .catch((err) => {
    const code = err.code || "COMMAND_FAILED";
    const output: {
      ok: false;
      error: { code: string; message: string };
      elapsedMs: number;
      url?: string;
    } = {
      ok: false,
      error: {
        code,
        message: err.message,
      },
      elapsedMs: elapsedMs(),
    };

    if (runContext.currentUrl || runContext.targetUrl) {
      output.url = runContext.currentUrl || runContext.targetUrl;
    }

    console.error(JSON.stringify(output, null, 2));
    process.exit(1);
  });