diff --git a/AGENTS.md b/AGENTS.md index 8518000..aa9a573 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -42,6 +42,7 @@ Current `glimpse` subcommands: - `exec --js=` or `--script=` - execute JavaScript and return the result - `screenshot --output=` - save a PNG screenshot - `search ` - search with a supported provider and output JSON results +- `serve` - start a persistent browser server (geckodriver + Firefox) for faster repeat commands; auto-discovered via Unix socket, `--stop` to shut down, `--status` to check ## Runtime Requirements diff --git a/src/driver.ts b/src/driver.ts index 75c09b6..3e7182f 100644 --- a/src/driver.ts +++ b/src/driver.ts @@ -1,13 +1,20 @@ +import http from "node:http"; +import { createRequire } from "node:module"; import { execFileSync } from "node:child_process"; -import { Builder, type WebDriver } from "selenium-webdriver"; +import { Builder, WebDriver } from "selenium-webdriver"; export { type WebDriver }; import firefox from "selenium-webdriver/firefox.js"; +import { isServerRunning, socketPath } from "./serve.js"; + +const require = createRequire(import.meta.url); +const { HttpClient, Executor } = require("selenium-webdriver/http"); export interface DriverOptions { headless?: boolean; existingUrl?: string; } +// Find Geckodriver function findGeckodriver(): string { try { return execFileSync("which", ["geckodriver"], { encoding: "utf-8" }).trim(); @@ -18,10 +25,31 @@ function findGeckodriver(): string { } } +// Create Driver - Connects to a glimpse serve socket if available, falls +// back to an explicit --url server, or spins up an ad-hoc geckodriver. export async function createDriver({ headless = false, existingUrl, }: DriverOptions = {}): Promise { + // Check For Glimpse Server Socket + if (!existingUrl) { + const sock = socketPath(); + const running = await isServerRunning(sock); + if (running) { + const agent = new http.Agent( + // @ts-ignore — socketPath is supported by Node but not in the type defs + { socketPath: sock }, + ); + // URL is required by HttpClient but the agent routes via socket + const client = new HttpClient("http://localhost", agent); + const executor = new Executor(client); + return WebDriver.createSession( + executor, + new firefox.Options(), + ) as unknown as WebDriver; + } + } + const options = new firefox.Options(); // Configure Headless diff --git a/src/index.ts b/src/index.ts index 4e2d487..1621db9 100755 --- a/src/index.ts +++ b/src/index.ts @@ -3,6 +3,7 @@ import { loadConfig, type GlimpseConfig } from "./config.js"; import { createDriver, type WebDriver } from "./driver.js"; import { searchKagi, type SearchResult } from "./providers/kagi.js"; +import { startServer, stopServer, serverStatus } from "./serve.js"; import { readFileSync, writeFileSync } from "node:fs"; import TurndownService from "turndown"; @@ -92,6 +93,7 @@ Commands: exec [options] Execute JavaScript on a page and return the result screenshot [options] Save a PNG screenshot of a page search [options] Search using a supported provider and return JSON results + serve [options] Start a persistent browser server for faster repeat commands Common Options: --help Show this help @@ -116,6 +118,10 @@ Reader Options: --output= Write output to a file --no-reader Skip Reader View and use raw page extraction +Serve Options: + --stop Stop a running server + --status Show server status + Search Options: --provider= Search provider: kagi (default: config or kagi) --token= Kagi token (default: KAGI_TOKEN or config) @@ -128,7 +134,10 @@ Examples: glimpse exec https://example.com --js="return document.title" glimpse exec https://example.com --script=extract.js glimpse screenshot https://example.com --js="document.body.style.zoom = '80%'" --output=example.png - KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation"`; + KAGI_TOKEN=... glimpse search --provider=kagi "node.js browser automation" + glimpse serve + glimpse serve --stop + glimpse serve --status`; } function printHelp() { @@ -528,6 +537,16 @@ async function readerCommand() { }); } +async function serveCommand() { + if (args.includes("--stop")) { + return stopServer(); + } + if (args.includes("--status")) { + return serverStatus(); + } + return startServer({ headless }); +} + async function main() { if (!command || command === "--help") { printHelp(); @@ -548,6 +567,8 @@ async function main() { return readerCommand(); case "search": return searchCommand(); + case "serve": + return serveCommand(); default: unknownCommand(command); } diff --git a/src/serve.ts b/src/serve.ts new file mode 100644 index 0000000..1b3906a --- /dev/null +++ b/src/serve.ts @@ -0,0 +1,421 @@ +import http from "node:http"; +import net from "node:net"; +import os from "node:os"; +import path from "node:path"; +import fs from "node:fs"; +import { ChildProcess, spawn } from "node:child_process"; +import { execFileSync } from "node:child_process"; + +const GECKODRIVER_STARTUP_TIMEOUT = 5000; +// Socket Path - Per-user socket in XDG_RUNTIME_DIR (tmpfs, auto-cleaned on +// logout) with tmpdir fallback. GLIMPSE_SOCKET_PATH overrides for tests. +export function socketPath(): string { + if (process.env.GLIMPSE_SOCKET_PATH) return process.env.GLIMPSE_SOCKET_PATH; + const uid = + typeof process.getuid === "function" ? String(process.getuid()) : "0"; + const dir = process.env.XDG_RUNTIME_DIR ?? os.tmpdir(); + return path.join(dir, `glimpse-${uid}.sock`); +} + +// Try Connect - Attempts to connect to an existing socket. Resolves with +// true if something is listening, false otherwise. +export function isServerRunning(sock = socketPath()): Promise { + return new Promise((resolve) => { + const conn = net.createConnection(sock); + conn.on("connect", () => { + conn.destroy(); + resolve(true); + }); + conn.on("error", () => resolve(false)); + }); +} + +// Find Geckodriver +function findGeckodriver(): string { + try { + return execFileSync("which", ["geckodriver"], { encoding: "utf-8" }).trim(); + } catch { + throw new Error( + "geckodriver not found on $PATH. Install it (e.g. via Nix or your package manager).", + ); + } +} + +// Start Geckodriver - Spawns geckodriver on a random port and waits for it +// to be ready. Returns the child process and the resolved port. +async function startGeckodriver(): Promise<{ + process: ChildProcess; + port: number; +}> { + const binary = findGeckodriver(); + const child = spawn(binary, ["--port", "0"], { + stdio: ["ignore", "pipe", "pipe"], + }); + + // Wait For Port + const port = await new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + child.kill(); + reject(new Error("Timed out waiting for geckodriver to start")); + }, GECKODRIVER_STARTUP_TIMEOUT); + + let output = ""; + child.stdout?.on("data", (chunk: Buffer) => { + output += chunk.toString(); + const match = output.match(/Listening on [^\s]*:(\d+)/); + if (match) { + clearTimeout(timeout); + resolve(Number.parseInt(match[1], 10)); + } + }); + + child.on("exit", (code) => { + clearTimeout(timeout); + reject(new Error(`geckodriver exited with code ${code}: ${output}`)); + }); + }); + + return { process: child, port }; +} + +// Create Firefox Session - Creates a new WebDriver session via the +// geckodriver HTTP API directly, keeping Firefox alive. +async function createSession( + geckodriverPort: number, + headless: boolean, +): Promise { + const body = JSON.stringify({ + capabilities: { + alwaysMatch: { + browserName: "firefox", + "moz:firefoxOptions": { + args: headless ? ["--headless"] : [], + }, + }, + }, + }); + + return new Promise((resolve, reject) => { + const req = http.request( + { + hostname: "127.0.0.1", + port: geckodriverPort, + path: "/session", + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(body), + }, + }, + (res) => { + let data = ""; + res.on("data", (chunk: Buffer) => (data += chunk.toString())); + res.on("end", () => { + try { + const parsed = JSON.parse(data); + if (parsed.value?.sessionId) { + resolve(parsed.value.sessionId); + } else { + reject( + new Error(`Failed to create session: ${data.slice(0, 500)}`), + ); + } + } catch { + reject(new Error(`Invalid response from geckodriver: ${data.slice(0, 500)}`)); + } + }); + }, + ); + req.on("error", reject); + req.end(body); + }); +} + +// Delete Session - Closes the Firefox session via WebDriver API. +async function deleteSession( + geckodriverPort: number, + sessionId: string, +): Promise { + return new Promise((resolve) => { + const req = http.request( + { + hostname: "127.0.0.1", + port: geckodriverPort, + path: `/session/${sessionId}`, + method: "DELETE", + }, + () => resolve(), + ); + req.on("error", () => resolve()); + req.end(); + }); +} + +// Proxy Request - Forwards an HTTP request to geckodriver over TCP. +function proxyRequest( + clientReq: http.IncomingMessage, + clientRes: http.ServerResponse, + geckodriverPort: number, +) { + // Rewrite Host Header - The client sends Host: localhost via the socket + // but geckodriver expects the actual host:port. + const headers = { ...clientReq.headers, host: `127.0.0.1:${geckodriverPort}` }; + + const proxyReq = http.request( + { + hostname: "127.0.0.1", + port: geckodriverPort, + path: clientReq.url, + method: clientReq.method, + headers, + }, + (proxyRes) => { + clientRes.writeHead(proxyRes.statusCode ?? 500, proxyRes.headers); + proxyRes.pipe(clientRes); + }, + ); + + proxyReq.on("error", (err) => { + clientRes.writeHead(502); + clientRes.end(JSON.stringify({ error: err.message })); + }); + + clientReq.pipe(proxyReq); +} + +// Intercept Session Management - The proxy intercepts session creation and +// deletion to reuse the persistent Firefox session. New session requests +// return the existing session ID. Delete session requests are turned into +// navigations to about:blank instead of actually closing Firefox. +function handleRequest( + req: http.IncomingMessage, + res: http.ServerResponse, + geckodriverPort: number, + sessionId: string, +) { + const method = req.method?.toUpperCase(); + const urlPath = req.url ?? ""; + + // Intercept New Session - Return the existing session ID + if (method === "POST" && urlPath === "/session") { + // Drain the request body + req.resume(); + req.on("end", () => { + const response = { + value: { + sessionId, + capabilities: {}, + }, + }; + const body = JSON.stringify(response); + res.writeHead(200, { + "Content-Type": "application/json", + "Content-Length": String(Buffer.byteLength(body)), + }); + res.end(body); + }); + return; + } + + // Intercept Delete Session - Navigate to about:blank instead + if ( + method === "DELETE" && + urlPath === `/session/${sessionId}` + ) { + const navBody = JSON.stringify({ url: "about:blank" }); + const navReq = http.request( + { + hostname: "127.0.0.1", + port: geckodriverPort, + path: `/session/${sessionId}/url`, + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": String(Buffer.byteLength(navBody)), + }, + }, + (navRes) => { + let data = ""; + navRes.on("data", (chunk: Buffer) => (data += chunk.toString())); + navRes.on("end", () => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ value: null })); + }); + }, + ); + navReq.on("error", () => { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ value: null })); + }); + navReq.end(navBody); + return; + } + + // Proxy Everything Else + proxyRequest(req, res, geckodriverPort); +} + +// Shutdown Command - Sent by `glimpse serve --stop` via a special +// non-WebDriver endpoint. +const SHUTDOWN_PATH = "/__glimpse/shutdown"; +const STATUS_PATH = "/__glimpse/status"; + +export interface ServeOptions { + headless?: boolean; +} + +// Start Server - Main entry point for `glimpse serve`. +export async function startServer({ + headless = true, +}: ServeOptions = {}): Promise { + const sock = socketPath(); + + // Check For Existing Server + const running = await isServerRunning(sock); + if (running) { + console.error(`glimpse: server already running on ${sock}`); + process.exit(1); + } + + // Clean Stale Socket + if (fs.existsSync(sock)) { + fs.unlinkSync(sock); + } + + // Start Geckodriver + console.error("glimpse: starting geckodriver..."); + const geckodriver = await startGeckodriver(); + console.error(`glimpse: geckodriver listening on port ${geckodriver.port}`); + + // Create Persistent Firefox Session + console.error( + `glimpse: starting Firefox${headless ? " (headless)" : ""}...`, + ); + const sessionId = await createSession(geckodriver.port, headless); + console.error(`glimpse: Firefox session ${sessionId} created`); + + // Create Proxy Server + const server = http.createServer((req, res) => { + const urlPath = req.url ?? ""; + + // Handle Shutdown + if (urlPath === SHUTDOWN_PATH) { + req.resume(); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ ok: true })); + shutdown(); + return; + } + + // Handle Status + if (urlPath === STATUS_PATH) { + req.resume(); + res.writeHead(200, { "Content-Type": "application/json" }); + res.end( + JSON.stringify({ + ok: true, + sessionId, + geckodriverPort: geckodriver.port, + socket: sock, + headless, + pid: process.pid, + }), + ); + return; + } + + handleRequest(req, res, geckodriver.port, sessionId); + }); + + server.listen(sock, () => { + console.error(`glimpse: server listening on ${sock}`); + }); + + // Cleanup Handler + let shuttingDown = false; + async function shutdown() { + if (shuttingDown) return; + shuttingDown = true; + + console.error("glimpse: shutting down..."); + + // Close Proxy Server + server.close(); + + // Delete Firefox Session + try { + await deleteSession(geckodriver.port, sessionId); + } catch { + // Firefox may already be gone + } + + // Kill Geckodriver + geckodriver.process.kill(); + + // Remove Socket + try { + fs.unlinkSync(sock); + } catch { + // Socket may already be gone + } + + console.error("glimpse: stopped"); + process.exit(0); + } + + process.on("SIGTERM", shutdown); + process.on("SIGINT", shutdown); +} + +// Stop Server - Connects to the socket and sends a shutdown request. +export async function stopServer(): Promise { + const sock = socketPath(); + const running = await isServerRunning(sock); + + if (!running) { + console.error("glimpse: no server running"); + process.exit(1); + } + + return new Promise((resolve, reject) => { + const req = http.request( + { socketPath: sock, path: SHUTDOWN_PATH, method: "POST" }, + (res) => { + res.resume(); + res.on("end", () => { + console.error("glimpse: server stopped"); + resolve(); + }); + }, + ); + req.on("error", reject); + req.end(); + }); +} + +// Server Status - Connects to the socket and queries status. +export async function serverStatus(): Promise { + const sock = socketPath(); + const running = await isServerRunning(sock); + + if (!running) { + console.error("glimpse: no server running"); + process.exit(1); + } + + return new Promise((resolve, reject) => { + const req = http.request( + { socketPath: sock, path: STATUS_PATH, method: "GET" }, + (res) => { + let data = ""; + res.on("data", (chunk: Buffer) => (data += chunk.toString())); + res.on("end", () => { + process.stdout.write(data + "\n"); + resolve(); + }); + }, + ); + req.on("error", reject); + req.end(); + }); +} diff --git a/test/smoke.js b/test/smoke.js index 15976a8..c5cd134 100755 --- a/test/smoke.js +++ b/test/smoke.js @@ -10,7 +10,7 @@ import { } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { spawnSync } from "node:child_process"; +import { spawnSync, spawn } from "node:child_process"; import assert from "node:assert/strict"; const cliPath = new URL("../src/index.ts", import.meta.url).pathname; @@ -163,6 +163,119 @@ test( }, ); +// Serve Helper - Starts a glimpse serve process with an isolated socket, +// waits for the "server listening" message, and returns a handle with stop(). +function startServe(extraEnv = {}) { + const sock = join(tempDir, `serve-${Date.now()}.sock`); + const env = { ...process.env, GLIMPSE_SOCKET_PATH: sock, ...extraEnv }; + + const child = spawn(process.execPath, ["--import", "tsx", cliPath, "serve"], { + encoding: "utf-8", + env, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stderr = ""; + child.stderr.on("data", (chunk) => { stderr += chunk.toString(); }); + + const ready = new Promise((resolve, reject) => { + const timeout = setTimeout(() => reject(new Error(`serve did not start: ${stderr}`)), 15000); + child.stderr.on("data", () => { + if (stderr.includes("server listening")) { + clearTimeout(timeout); + resolve(); + } + }); + child.on("exit", (code) => { + clearTimeout(timeout); + reject(new Error(`serve exited early (code ${code}): ${stderr}`)); + }); + }); + + function runWithSocket(args) { + return spawnSync(process.execPath, ["--import", "tsx", cliPath, ...args], { + encoding: "utf-8", + env, + timeout: 30000, + }); + } + + function stop() { + runWithSocket(["serve", "--stop"]); + return new Promise((resolve) => { + child.on("exit", () => resolve()); + setTimeout(() => { child.kill(); resolve(); }, 5000); + }); + } + + return { ready, stop, runWithSocket, sock, child }; +} + +test( + "serve starts and stops cleanly", + ["serve"], + async () => { + const server = startServe(); + await server.ready; + + // Status Should Report Running + const status = server.runWithSocket(["serve", "--status"]); + assert.equal(status.status, 0, status.stderr); + const info = parseJson(status.stdout); + assert.equal(info.ok, true); + assert.equal(typeof info.sessionId, "string"); + assert.equal(info.socket, server.sock); + + await server.stop(); + assert.equal(existsSync(server.sock), false, "socket should be cleaned up"); + }, +); + +test( + "serve reuses browser session across commands", + ["serve"], + async () => { + const server = startServe(); + await server.ready; + + // First Command + const r1 = server.runWithSocket([ + "reader", + dataHtml("

First

"), + "--no-reader", + ]); + assert.equal(r1.status, 0, r1.stderr); + assert.match(r1.stdout, /# First/); + + // Second Command + const r2 = server.runWithSocket([ + "reader", + dataHtml("

Second

"), + "--no-reader", + ]); + assert.equal(r2.status, 0, r2.stderr); + assert.match(r2.stdout, /# Second/); + + await server.stop(); + }, +); + +test( + "serve rejects second instance", + ["serve", "errors"], + async () => { + const server = startServe(); + await server.ready; + + // Second Instance Should Fail + const result = server.runWithSocket(["serve"]); + assert.notEqual(result.status, 0); + assert.match(result.stderr, /already running/); + + await server.stop(); + }, +); + test("search validates kagi token in provider", ["search", "errors"], () => { const env = { ...process.env }; delete env.KAGI_TOKEN;