initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
codexis
|
||||
.codexis
|
||||
62
AGENTS.md
Normal file
62
AGENTS.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# Codexis
|
||||
|
||||
Tree-sitter powered code indexer. Produces a SQLite database of symbols, files, and line numbers at `.codexis/index.db`.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
codexis [flags] [root] # default root is current directory
|
||||
|
||||
codexis . # index cwd → .codexis/index.db
|
||||
codexis -force . # full re-index (ignore file hashes)
|
||||
codexis -o /tmp/out.db . # custom output path
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
- **`main.go`** — CLI entry, schema creation, orchestration
|
||||
- **`indexer/walker.go`** — Uses `git ls-files` to find files, `grammars.DetectLanguage()` to filter
|
||||
- **`indexer/indexer.go`** — For each file: hash check → tree-sitter tag → store symbols
|
||||
- **`indexer/scope.go`** — Package extraction (language-specific AST queries with filepath fallback), export detection
|
||||
- **`db/`** — sqlc-generated code from `schema.sql` and `queries.sql`
|
||||
- **`extension/`** — Pi coding agent extension providing `codexis` tool for LLM SQL queries
|
||||
|
||||
## Key Dependencies
|
||||
|
||||
- **`github.com/odvcencio/gotreesitter`** — Pure-Go tree-sitter runtime (no CGo). 206 grammars.
|
||||
- `grammars.DetectLanguage(filename)` → language detection
|
||||
- `grammars.ResolveTagsQuery(entry)` → symbol extraction queries (inferred if not explicit)
|
||||
- `gotreesitter.NewTagger(lang, query).Tag(src)` → returns `[]Tag` with kind, name, range
|
||||
- **`github.com/mattn/go-sqlite3`** — SQLite driver
|
||||
- **sqlc** — Generates Go from `db/schema.sql` + `db/queries.sql`
|
||||
|
||||
## Schema
|
||||
|
||||
Two tables: `files` and `symbols`. See `db/schema.sql`.
|
||||
|
||||
Symbol kinds (enforced via CHECK constraint): `function`, `method`, `class`, `type`, `interface`, `constant`, `variable`, `constructor`.
|
||||
|
||||
Parent-child relationships (e.g., method → class) are determined by range containment in the AST.
|
||||
|
||||
## Pi Extension
|
||||
|
||||
`extension/codexis.ts` registers a single `codexis` tool. Install:
|
||||
|
||||
```bash
|
||||
# Symlink into pi extensions directory
|
||||
ln -s $(pwd)/codexis/extension ~/.pi/agent/extensions/codexis
|
||||
```
|
||||
|
||||
The tool finds `<git-root>/.codexis/index.db` automatically and runs read-only SQL queries. Schema is embedded in the tool description so the LLM knows the tables and valid enum values.
|
||||
|
||||
## Modifying
|
||||
|
||||
1. Schema changes: edit `db/schema.sql` + `db/queries.sql`, run `sqlc generate` in `db/`
|
||||
2. New language package queries: add to `packageQueries` map in `indexer/scope.go`
|
||||
3. Export detection heuristics: `IsExported()` in `indexer/scope.go`
|
||||
|
||||
## Principles
|
||||
|
||||
- **KISS** — Use the tagger as-is. Don't write custom per-language extractors unless the tagger is insufficient.
|
||||
- **YAGNI** — No query CLI, no web UI, no call graph. Just produce the `.db` file.
|
||||
- **Incremental** — Files are skipped if their sha256 hash hasn't changed. Use `-force` to bypass.
|
||||
31
db/db.go
Normal file
31
db/db.go
Normal file
@@ -0,0 +1,31 @@
|
||||
// Code generated by sqlc. DO NOT EDIT.
|
||||
// versions:
|
||||
// sqlc v1.30.0
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
)
|
||||
|
||||
type DBTX interface {
|
||||
ExecContext(context.Context, string, ...interface{}) (sql.Result, error)
|
||||
PrepareContext(context.Context, string) (*sql.Stmt, error)
|
||||
QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error)
|
||||
QueryRowContext(context.Context, string, ...interface{}) *sql.Row
|
||||
}
|
||||
|
||||
func New(db DBTX) *Queries {
|
||||
return &Queries{db: db}
|
||||
}
|
||||
|
||||
type Queries struct {
|
||||
db DBTX
|
||||
}
|
||||
|
||||
func (q *Queries) WithTx(tx *sql.Tx) *Queries {
|
||||
return &Queries{
|
||||
db: tx,
|
||||
}
|
||||
}
|
||||
31
db/models.go
Normal file
31
db/models.go
Normal file
@@ -0,0 +1,31 @@
|
||||
// Code generated by sqlc. DO NOT EDIT.
|
||||
// versions:
|
||||
// sqlc v1.30.0
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
)
|
||||
|
||||
type File struct {
|
||||
ID int64
|
||||
Path string
|
||||
Language string
|
||||
Package sql.NullString
|
||||
Hash string
|
||||
IndexedAt sql.NullTime
|
||||
}
|
||||
|
||||
type Symbol struct {
|
||||
ID int64
|
||||
FileID int64
|
||||
Name string
|
||||
Kind string
|
||||
Line int64
|
||||
LineEnd sql.NullInt64
|
||||
Col sql.NullInt64
|
||||
ColEnd sql.NullInt64
|
||||
Exported sql.NullBool
|
||||
ParentID sql.NullInt64
|
||||
}
|
||||
25
db/queries.sql
Normal file
25
db/queries.sql
Normal file
@@ -0,0 +1,25 @@
|
||||
-- name: GetFileByPath :one
|
||||
SELECT id, path, language, package, hash, indexed_at
|
||||
FROM files
|
||||
WHERE path = ?;
|
||||
|
||||
-- name: UpsertFile :one
|
||||
INSERT INTO files (path, language, package, hash)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
language = excluded.language,
|
||||
package = excluded.package,
|
||||
hash = excluded.hash,
|
||||
indexed_at = CURRENT_TIMESTAMP
|
||||
RETURNING id, path, language, package, hash, indexed_at;
|
||||
|
||||
-- name: DeleteSymbolsByFileID :exec
|
||||
DELETE FROM symbols WHERE file_id = ?;
|
||||
|
||||
-- name: InsertSymbol :one
|
||||
INSERT INTO symbols (file_id, name, kind, line, line_end, col, col_end, exported, parent_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
RETURNING id;
|
||||
|
||||
-- name: DeleteStaleFiles :exec
|
||||
DELETE FROM files WHERE path NOT IN (sqlc.slice('paths'));
|
||||
132
db/queries.sql.go
Normal file
132
db/queries.sql.go
Normal file
@@ -0,0 +1,132 @@
|
||||
// Code generated by sqlc. DO NOT EDIT.
|
||||
// versions:
|
||||
// sqlc v1.30.0
|
||||
// source: queries.sql
|
||||
|
||||
package db
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const deleteStaleFiles = `-- name: DeleteStaleFiles :exec
|
||||
DELETE FROM files WHERE path NOT IN (/*SLICE:paths*/?)
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteStaleFiles(ctx context.Context, paths []string) error {
|
||||
query := deleteStaleFiles
|
||||
var queryParams []interface{}
|
||||
if len(paths) > 0 {
|
||||
for _, v := range paths {
|
||||
queryParams = append(queryParams, v)
|
||||
}
|
||||
query = strings.Replace(query, "/*SLICE:paths*/?", strings.Repeat(",?", len(paths))[1:], 1)
|
||||
} else {
|
||||
query = strings.Replace(query, "/*SLICE:paths*/?", "NULL", 1)
|
||||
}
|
||||
_, err := q.db.ExecContext(ctx, query, queryParams...)
|
||||
return err
|
||||
}
|
||||
|
||||
const deleteSymbolsByFileID = `-- name: DeleteSymbolsByFileID :exec
|
||||
DELETE FROM symbols WHERE file_id = ?
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteSymbolsByFileID(ctx context.Context, fileID int64) error {
|
||||
_, err := q.db.ExecContext(ctx, deleteSymbolsByFileID, fileID)
|
||||
return err
|
||||
}
|
||||
|
||||
const getFileByPath = `-- name: GetFileByPath :one
|
||||
SELECT id, path, language, package, hash, indexed_at
|
||||
FROM files
|
||||
WHERE path = ?
|
||||
`
|
||||
|
||||
func (q *Queries) GetFileByPath(ctx context.Context, path string) (File, error) {
|
||||
row := q.db.QueryRowContext(ctx, getFileByPath, path)
|
||||
var i File
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
&i.Path,
|
||||
&i.Language,
|
||||
&i.Package,
|
||||
&i.Hash,
|
||||
&i.IndexedAt,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const insertSymbol = `-- name: InsertSymbol :one
|
||||
INSERT INTO symbols (file_id, name, kind, line, line_end, col, col_end, exported, parent_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
RETURNING id
|
||||
`
|
||||
|
||||
type InsertSymbolParams struct {
|
||||
FileID int64
|
||||
Name string
|
||||
Kind string
|
||||
Line int64
|
||||
LineEnd sql.NullInt64
|
||||
Col sql.NullInt64
|
||||
ColEnd sql.NullInt64
|
||||
Exported sql.NullBool
|
||||
ParentID sql.NullInt64
|
||||
}
|
||||
|
||||
func (q *Queries) InsertSymbol(ctx context.Context, arg InsertSymbolParams) (int64, error) {
|
||||
row := q.db.QueryRowContext(ctx, insertSymbol,
|
||||
arg.FileID,
|
||||
arg.Name,
|
||||
arg.Kind,
|
||||
arg.Line,
|
||||
arg.LineEnd,
|
||||
arg.Col,
|
||||
arg.ColEnd,
|
||||
arg.Exported,
|
||||
arg.ParentID,
|
||||
)
|
||||
var id int64
|
||||
err := row.Scan(&id)
|
||||
return id, err
|
||||
}
|
||||
|
||||
const upsertFile = `-- name: UpsertFile :one
|
||||
INSERT INTO files (path, language, package, hash)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
language = excluded.language,
|
||||
package = excluded.package,
|
||||
hash = excluded.hash,
|
||||
indexed_at = CURRENT_TIMESTAMP
|
||||
RETURNING id, path, language, package, hash, indexed_at
|
||||
`
|
||||
|
||||
type UpsertFileParams struct {
|
||||
Path string
|
||||
Language string
|
||||
Package sql.NullString
|
||||
Hash string
|
||||
}
|
||||
|
||||
func (q *Queries) UpsertFile(ctx context.Context, arg UpsertFileParams) (File, error) {
|
||||
row := q.db.QueryRowContext(ctx, upsertFile,
|
||||
arg.Path,
|
||||
arg.Language,
|
||||
arg.Package,
|
||||
arg.Hash,
|
||||
)
|
||||
var i File
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
&i.Path,
|
||||
&i.Language,
|
||||
&i.Package,
|
||||
&i.Hash,
|
||||
&i.IndexedAt,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
34
db/schema.sql
Normal file
34
db/schema.sql
Normal file
@@ -0,0 +1,34 @@
|
||||
CREATE TABLE files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
path TEXT NOT NULL UNIQUE,
|
||||
language TEXT NOT NULL,
|
||||
package TEXT,
|
||||
hash TEXT NOT NULL,
|
||||
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE symbols (
|
||||
id INTEGER PRIMARY KEY,
|
||||
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL CHECK(kind IN (
|
||||
'function', 'method', 'class', 'type',
|
||||
'interface', 'constant', 'variable', 'constructor'
|
||||
)),
|
||||
line INTEGER NOT NULL,
|
||||
line_end INTEGER,
|
||||
col INTEGER,
|
||||
col_end INTEGER,
|
||||
exported BOOLEAN,
|
||||
parent_id INTEGER REFERENCES symbols(id),
|
||||
UNIQUE(file_id, name, kind, line)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_symbols_name ON symbols(name);
|
||||
CREATE INDEX idx_symbols_kind ON symbols(kind);
|
||||
CREATE INDEX idx_symbols_file_line ON symbols(file_id, line);
|
||||
CREATE INDEX idx_symbols_parent ON symbols(parent_id);
|
||||
CREATE INDEX idx_symbols_exported ON symbols(exported, kind);
|
||||
CREATE INDEX idx_files_path ON files(path);
|
||||
CREATE INDEX idx_files_language ON files(language);
|
||||
CREATE INDEX idx_files_package ON files(package);
|
||||
9
db/sqlc.yaml
Normal file
9
db/sqlc.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
version: "2"
|
||||
sql:
|
||||
- engine: "sqlite"
|
||||
queries: "queries.sql"
|
||||
schema: "schema.sql"
|
||||
gen:
|
||||
go:
|
||||
package: "db"
|
||||
out: "."
|
||||
3
extension/.gitignore
vendored
Normal file
3
extension/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
node_modules
|
||||
package.json
|
||||
package-lock.json
|
||||
161
extension/codexis.ts
Normal file
161
extension/codexis.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
/**
|
||||
* Codexis - Code index query tool for pi
|
||||
*
|
||||
* Provides a single tool that queries the .codexis/index.db SQLite database
|
||||
* containing symbols, files, and line numbers for the codebase.
|
||||
*/
|
||||
|
||||
import { Type } from "@mariozechner/pi-ai";
|
||||
import { defineTool, type ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||
import { execSync } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import Database from "better-sqlite3";
|
||||
|
||||
const SCHEMA = `-- .codexis/index.db schema:
|
||||
--
|
||||
-- files: indexed source files
|
||||
-- id INTEGER PRIMARY KEY
|
||||
-- path TEXT NOT NULL UNIQUE -- relative to repo root
|
||||
-- language TEXT NOT NULL -- e.g. 'go', 'typescript', 'python', 'tsx', 'proto'
|
||||
-- package TEXT -- package/module name (from AST or directory)
|
||||
-- hash TEXT NOT NULL -- sha256, for incremental indexing
|
||||
-- indexed_at DATETIME
|
||||
--
|
||||
-- symbols: definitions extracted via tree-sitter
|
||||
-- id INTEGER PRIMARY KEY
|
||||
-- file_id INTEGER NOT NULL REFERENCES files(id)
|
||||
-- name TEXT NOT NULL
|
||||
-- kind TEXT NOT NULL -- one of: 'function','method','class','type','interface','constant','variable','constructor'
|
||||
-- line INTEGER NOT NULL -- 1-indexed
|
||||
-- line_end INTEGER -- end of definition body
|
||||
-- col INTEGER
|
||||
-- col_end INTEGER
|
||||
-- exported BOOLEAN -- language-specific visibility
|
||||
-- parent_id INTEGER REFERENCES symbols(id) -- e.g. method→class, field→struct`;
|
||||
|
||||
const DESCRIPTION = `Query the code index database (.codexis/index.db). Run read-only SQL to find symbols, files, and line numbers across the codebase.
|
||||
|
||||
${SCHEMA}
|
||||
|
||||
Example queries:
|
||||
-- Find where a function is defined
|
||||
SELECT f.path, s.line FROM symbols s JOIN files f ON s.file_id=f.id WHERE s.name='HandleRequest'
|
||||
|
||||
-- Public API of a package
|
||||
SELECT s.name, s.kind, s.line, f.path FROM symbols s JOIN files f ON s.file_id=f.id WHERE f.package='server' AND s.exported=1
|
||||
|
||||
-- All types in a directory
|
||||
SELECT s.name, s.line, f.path FROM symbols s JOIN files f ON s.file_id=f.id WHERE f.path LIKE 'backend/api/%' AND s.kind='type'
|
||||
|
||||
-- Methods on a class/type (via parent_id)
|
||||
SELECT c.name as parent, s.name, s.kind, s.line FROM symbols s JOIN symbols c ON s.parent_id=c.id WHERE c.name='AuthService'
|
||||
|
||||
-- Overview: symbols per area
|
||||
SELECT CASE WHEN f.path LIKE 'backend/%' THEN 'backend' WHEN f.path LIKE 'frontend/%' THEN 'frontend' ELSE 'other' END as area, COUNT(*) FROM symbols s JOIN files f ON s.file_id=f.id GROUP BY area`;
|
||||
|
||||
function findGitRoot(cwd: string): string | null {
|
||||
try {
|
||||
return execSync("git rev-parse --show-toplevel", {
|
||||
cwd,
|
||||
encoding: "utf-8",
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
}).trim();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function findDatabase(cwd: string): string | null {
|
||||
const gitRoot = findGitRoot(cwd);
|
||||
if (!gitRoot) return null;
|
||||
const dbPath = join(gitRoot, ".codexis", "index.db");
|
||||
if (!existsSync(dbPath)) return null;
|
||||
return dbPath;
|
||||
}
|
||||
|
||||
const codexisTool = defineTool({
|
||||
name: "codexis",
|
||||
label: "Codexis",
|
||||
description: DESCRIPTION,
|
||||
parameters: Type.Object({
|
||||
sql: Type.String({
|
||||
description: "SQL query to run against the code index database",
|
||||
}),
|
||||
}),
|
||||
|
||||
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
||||
const dbPath = findDatabase(ctx.cwd);
|
||||
if (!dbPath) {
|
||||
throw new Error(
|
||||
"No code index found. Run `codexis` in the repo root to generate .codexis/index.db"
|
||||
);
|
||||
}
|
||||
|
||||
const db = new Database(dbPath, { readonly: true });
|
||||
try {
|
||||
// Block writes
|
||||
const normalized = params.sql.trim().toUpperCase();
|
||||
if (
|
||||
!normalized.startsWith("SELECT") &&
|
||||
!normalized.startsWith("WITH") &&
|
||||
!normalized.startsWith("EXPLAIN") &&
|
||||
!normalized.startsWith("PRAGMA")
|
||||
) {
|
||||
throw new Error("Only SELECT, WITH, EXPLAIN, and PRAGMA queries are allowed");
|
||||
}
|
||||
|
||||
const stmt = db.prepare(params.sql);
|
||||
const rows = stmt.all();
|
||||
|
||||
if (rows.length === 0) {
|
||||
return {
|
||||
content: [{ type: "text", text: "No results." }],
|
||||
details: { rowCount: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
// Format as aligned text table
|
||||
const columns = Object.keys(rows[0] as Record<string, unknown>);
|
||||
const data = rows.map((row) => {
|
||||
const r = row as Record<string, unknown>;
|
||||
return columns.map((col) => String(r[col] ?? "NULL"));
|
||||
});
|
||||
|
||||
const widths = columns.map((col, i) =>
|
||||
Math.max(col.length, ...data.map((row) => row[i].length))
|
||||
);
|
||||
|
||||
const header = columns
|
||||
.map((col, i) => col.padEnd(widths[i]))
|
||||
.join(" ");
|
||||
const separator = widths.map((w) => "-".repeat(w)).join(" ");
|
||||
const body = data
|
||||
.map((row) =>
|
||||
row.map((val, i) => val.padEnd(widths[i])).join(" ")
|
||||
)
|
||||
.join("\n");
|
||||
|
||||
const result = `${header}\n${separator}\n${body}`;
|
||||
|
||||
// Truncate if huge
|
||||
const maxLen = 48000;
|
||||
const truncated =
|
||||
result.length > maxLen
|
||||
? result.slice(0, maxLen) +
|
||||
`\n\n[Truncated: ${rows.length} rows total, showing partial results. Narrow your query.]`
|
||||
: result;
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: truncated }],
|
||||
details: { rowCount: rows.length },
|
||||
};
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
export default function (pi: ExtensionAPI) {
|
||||
pi.registerTool(codexisTool);
|
||||
}
|
||||
61
flake.lock
generated
Normal file
61
flake.lock
generated
Normal file
@@ -0,0 +1,61 @@
|
||||
{
|
||||
"nodes": {
|
||||
"flake-utils": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1731533236,
|
||||
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1771208521,
|
||||
"narHash": "sha256-X01Q3DgSpjeBpapoGA4rzKOn25qdKxbPnxHeMLNoHTU=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "fa56d7d6de78f5a7f997b0ea2bc6efd5868ad9e8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-25.11",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"flake-utils": "flake-utils",
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
28
flake.nix
Normal file
28
flake.nix
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
description = "Dev Shell";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
};
|
||||
|
||||
outputs =
|
||||
{ self
|
||||
, nixpkgs
|
||||
, flake-utils
|
||||
,
|
||||
}:
|
||||
flake-utils.lib.eachDefaultSystem (
|
||||
system:
|
||||
let
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
in
|
||||
{
|
||||
devShells.default = pkgs.mkShell {
|
||||
packages = with pkgs; [
|
||||
go
|
||||
];
|
||||
};
|
||||
}
|
||||
);
|
||||
}
|
||||
8
go.mod
Normal file
8
go.mod
Normal file
@@ -0,0 +1,8 @@
|
||||
module codexis
|
||||
|
||||
go 1.25.0
|
||||
|
||||
require (
|
||||
github.com/mattn/go-sqlite3 v1.14.42
|
||||
github.com/odvcencio/gotreesitter v0.13.4
|
||||
)
|
||||
4
go.sum
Normal file
4
go.sum
Normal file
@@ -0,0 +1,4 @@
|
||||
github.com/mattn/go-sqlite3 v1.14.42 h1:MigqEP4ZmHw3aIdIT7T+9TLa90Z6smwcthx+Azv4Cgo=
|
||||
github.com/mattn/go-sqlite3 v1.14.42/go.mod h1:pjEuOr8IwzLJP2MfGeTb0A35jauH+C2kbHKBr7yXKVQ=
|
||||
github.com/odvcencio/gotreesitter v0.13.4 h1:O/FqOlabRz1Neg6UISx0URtwuN1FQ2eGCc846KHcBbQ=
|
||||
github.com/odvcencio/gotreesitter v0.13.4/go.mod h1:Sx+iYJBfw5xSWkSttLSuFvguJctlH+ma1BTxZ0MPCqo=
|
||||
218
indexer/indexer.go
Normal file
218
indexer/indexer.go
Normal file
@@ -0,0 +1,218 @@
|
||||
package indexer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/odvcencio/gotreesitter"
|
||||
"github.com/odvcencio/gotreesitter/grammars"
|
||||
|
||||
"codexis/db"
|
||||
)
|
||||
|
||||
// Indexer walks a codebase, extracts symbols via tree-sitter, and stores them in SQLite.
|
||||
type Indexer struct {
|
||||
queries *db.Queries
|
||||
root string
|
||||
force bool
|
||||
}
|
||||
|
||||
// New creates a new Indexer.
|
||||
func New(queries *db.Queries, root string, force bool) *Indexer {
|
||||
return &Indexer{
|
||||
queries: queries,
|
||||
root: root,
|
||||
force: force,
|
||||
}
|
||||
}
|
||||
|
||||
// Stats holds indexing statistics.
|
||||
type Stats struct {
|
||||
FilesTotal int
|
||||
FilesIndexed int
|
||||
FilesSkipped int
|
||||
SymbolsTotal int
|
||||
}
|
||||
|
||||
// Index walks the codebase and indexes all recognized files.
|
||||
func (idx *Indexer) Index(ctx context.Context) (*Stats, error) {
|
||||
files, err := WalkFiles(idx.root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("walking files: %w", err)
|
||||
}
|
||||
|
||||
stats := &Stats{FilesTotal: len(files)}
|
||||
|
||||
for _, relPath := range files {
|
||||
indexed, symbolCount, err := idx.indexFile(ctx, relPath)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "warn: %s: %v\n", relPath, err)
|
||||
continue
|
||||
}
|
||||
if indexed {
|
||||
stats.FilesIndexed++
|
||||
stats.SymbolsTotal += symbolCount
|
||||
} else {
|
||||
stats.FilesSkipped++
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up files that no longer exist
|
||||
if err := idx.queries.DeleteStaleFiles(ctx, files); err != nil {
|
||||
return nil, fmt.Errorf("cleaning stale files: %w", err)
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (idx *Indexer) indexFile(ctx context.Context, relPath string) (indexed bool, symbolCount int, err error) {
|
||||
absPath := filepath.Join(idx.root, relPath)
|
||||
|
||||
src, err := os.ReadFile(absPath)
|
||||
if err != nil {
|
||||
return false, 0, fmt.Errorf("reading file: %w", err)
|
||||
}
|
||||
|
||||
hash := fmt.Sprintf("%x", sha256.Sum256(src))
|
||||
|
||||
// Check if file has changed
|
||||
if !idx.force {
|
||||
existing, err := idx.queries.GetFileByPath(ctx, relPath)
|
||||
if err == nil && existing.Hash == hash {
|
||||
return false, 0, nil // unchanged
|
||||
}
|
||||
}
|
||||
|
||||
// Detect language
|
||||
entry := grammars.DetectLanguage(filepath.Base(relPath))
|
||||
if entry == nil {
|
||||
return false, 0, nil
|
||||
}
|
||||
|
||||
// Extract package
|
||||
pkg := ExtractPackage(src, relPath, entry)
|
||||
|
||||
// Upsert file record
|
||||
file, err := idx.queries.UpsertFile(ctx, db.UpsertFileParams{
|
||||
Path: relPath,
|
||||
Language: entry.Name,
|
||||
Package: sql.NullString{String: pkg, Valid: pkg != ""},
|
||||
Hash: hash,
|
||||
})
|
||||
if err != nil {
|
||||
return false, 0, fmt.Errorf("upserting file: %w", err)
|
||||
}
|
||||
|
||||
// Clear old symbols
|
||||
if err := idx.queries.DeleteSymbolsByFileID(ctx, file.ID); err != nil {
|
||||
return false, 0, fmt.Errorf("deleting old symbols: %w", err)
|
||||
}
|
||||
|
||||
// Extract and store symbols
|
||||
tags := extractTags(src, entry)
|
||||
defs := buildSymbolDefs(tags, file.ID, entry.Name)
|
||||
|
||||
// Insert symbols in order, tracking DB IDs for parent resolution
|
||||
dbIDs := make([]int64, len(defs))
|
||||
for i, def := range defs {
|
||||
// Resolve parent_id from local index to actual DB ID
|
||||
params := def.params
|
||||
if params.ParentID.Valid {
|
||||
parentIdx := params.ParentID.Int64
|
||||
params.ParentID = sql.NullInt64{Int64: dbIDs[parentIdx], Valid: true}
|
||||
}
|
||||
|
||||
id, err := idx.queries.InsertSymbol(ctx, params)
|
||||
if err != nil {
|
||||
return false, 0, fmt.Errorf("inserting symbol %q: %w", params.Name, err)
|
||||
}
|
||||
dbIDs[i] = id
|
||||
}
|
||||
|
||||
return true, len(defs), nil
|
||||
}
|
||||
|
||||
func extractTags(src []byte, entry *grammars.LangEntry) []gotreesitter.Tag {
|
||||
lang := entry.Language()
|
||||
|
||||
// ResolveTagsQuery returns the explicit TagsQuery if set, otherwise infers
|
||||
// one from the grammar's symbol table.
|
||||
tagsQuery := grammars.ResolveTagsQuery(*entry)
|
||||
if tagsQuery == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
tagger, err := gotreesitter.NewTagger(lang, tagsQuery)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return tagger.Tag(src)
|
||||
}
|
||||
|
||||
type symbolDef struct {
|
||||
tag gotreesitter.Tag
|
||||
params db.InsertSymbolParams
|
||||
}
|
||||
|
||||
func buildSymbolDefs(tags []gotreesitter.Tag, fileID int64, langName string) []symbolDef {
|
||||
// First pass: collect all definition tags
|
||||
var defs []symbolDef
|
||||
|
||||
for _, tag := range tags {
|
||||
kind := tagKind(tag.Kind)
|
||||
if kind == "" {
|
||||
continue // skip references and unknown kinds
|
||||
}
|
||||
|
||||
exported := IsExported(tag.Name, langName)
|
||||
|
||||
params := db.InsertSymbolParams{
|
||||
FileID: fileID,
|
||||
Name: tag.Name,
|
||||
Kind: kind,
|
||||
Line: int64(tag.NameRange.StartPoint.Row) + 1, // 1-indexed
|
||||
LineEnd: sql.NullInt64{Int64: int64(tag.Range.EndPoint.Row) + 1, Valid: true},
|
||||
Col: sql.NullInt64{Int64: int64(tag.NameRange.StartPoint.Column), Valid: true},
|
||||
ColEnd: sql.NullInt64{Int64: int64(tag.NameRange.EndPoint.Column), Valid: true},
|
||||
Exported: sql.NullBool{Bool: exported, Valid: true},
|
||||
ParentID: sql.NullInt64{Valid: false},
|
||||
}
|
||||
|
||||
defs = append(defs, symbolDef{tag: tag, params: params})
|
||||
}
|
||||
|
||||
// Second pass: determine parent relationships based on range containment.
|
||||
// ParentID stores the local index — resolved to DB ID during insert.
|
||||
// Tree-sitter returns tags in document order (outer before inner),
|
||||
// so scanning backwards finds the nearest enclosing definition.
|
||||
for i := range defs {
|
||||
for j := i - 1; j >= 0; j-- {
|
||||
if containsRange(defs[j].tag.Range, defs[i].tag.Range) {
|
||||
defs[i].params.ParentID = sql.NullInt64{Int64: int64(j), Valid: true}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return defs
|
||||
}
|
||||
|
||||
func containsRange(outer, inner gotreesitter.Range) bool {
|
||||
return outer.StartByte <= inner.StartByte && outer.EndByte >= inner.EndByte
|
||||
}
|
||||
|
||||
// tagKind converts a tree-sitter tag kind like "definition.function" to "function".
|
||||
// Returns empty string for non-definition tags.
|
||||
func tagKind(kind string) string {
|
||||
const prefix = "definition."
|
||||
if strings.HasPrefix(kind, prefix) {
|
||||
return kind[len(prefix):]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
92
indexer/scope.go
Normal file
92
indexer/scope.go
Normal file
@@ -0,0 +1,92 @@
|
||||
package indexer
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/odvcencio/gotreesitter"
|
||||
"github.com/odvcencio/gotreesitter/grammars"
|
||||
)
|
||||
|
||||
// packageQueries maps language names to tree-sitter queries that extract the
|
||||
// package/module declaration. The query must capture the package name as @name.
|
||||
var packageQueries = map[string]string{
|
||||
"go": `(package_clause (package_identifier) @name)`,
|
||||
"proto": `(package (full_ident) @name)`,
|
||||
"java": `(package_declaration (scoped_identifier) @name)`,
|
||||
"kotlin": `(package_header (identifier) @name)`,
|
||||
"scala": `(package_clause (identifier) @name)`,
|
||||
"rust": `(mod_item name: (identifier) @name)`,
|
||||
"elixir": `(call target: (dot left: (alias) @name))`, // defmodule
|
||||
"erlang": `(module_attribute name: (atom) @name)`,
|
||||
}
|
||||
|
||||
// ExtractPackage extracts the package/module name from source code.
|
||||
// Falls back to deriving from the file path if no language-specific query exists
|
||||
// or the query finds no match.
|
||||
func ExtractPackage(src []byte, filePath string, entry *grammars.LangEntry) string {
|
||||
if queryStr, ok := packageQueries[entry.Name]; ok {
|
||||
lang := entry.Language()
|
||||
if pkg := runPackageQuery(src, lang, queryStr); pkg != "" {
|
||||
return pkg
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: derive from directory name
|
||||
dir := filepath.Dir(filePath)
|
||||
if dir == "." || dir == "" {
|
||||
return ""
|
||||
}
|
||||
return filepath.Base(dir)
|
||||
}
|
||||
|
||||
func runPackageQuery(src []byte, lang *gotreesitter.Language, queryStr string) string {
|
||||
parser := gotreesitter.NewParser(lang)
|
||||
tree, err := parser.Parse(src)
|
||||
if err != nil || tree == nil || tree.RootNode() == nil {
|
||||
return ""
|
||||
}
|
||||
defer tree.Release()
|
||||
|
||||
query, err := gotreesitter.NewQuery(queryStr, lang)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
cursor := query.Exec(tree.RootNode(), lang, src)
|
||||
for {
|
||||
match, ok := cursor.NextMatch()
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
for _, cap := range match.Captures {
|
||||
if cap.Name == "name" {
|
||||
return cap.Node.Text(src)
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsExported determines if a symbol name is exported/public based on language conventions.
|
||||
func IsExported(name string, langName string) bool {
|
||||
if name == "" {
|
||||
return false
|
||||
}
|
||||
switch langName {
|
||||
case "go":
|
||||
// Go: exported if first letter is uppercase
|
||||
return name[0] >= 'A' && name[0] <= 'Z'
|
||||
case "python":
|
||||
// Python: private if starts with underscore
|
||||
return !strings.HasPrefix(name, "_")
|
||||
case "rust":
|
||||
// Rust: pub is in the AST, but we approximate: starts with uppercase for types
|
||||
// For functions, we can't tell without `pub` keyword — default to true
|
||||
return true
|
||||
default:
|
||||
// Most languages (JS/TS/Java/etc): export/public is a modifier in the AST
|
||||
// We can't reliably determine from name alone — default to nil/unknown
|
||||
return true
|
||||
}
|
||||
}
|
||||
38
indexer/walker.go
Normal file
38
indexer/walker.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package indexer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/odvcencio/gotreesitter/grammars"
|
||||
)
|
||||
|
||||
// WalkFiles returns all git-tracked files that tree-sitter can parse.
|
||||
// It uses `git ls-files` to respect .gitignore rules correctly.
|
||||
func WalkFiles(root string) ([]string, error) {
|
||||
cmd := exec.Command("git", "ls-files", "--cached", "--others", "--exclude-standard")
|
||||
cmd.Dir = root
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var files []string
|
||||
for _, line := range bytes.Split(out, []byte("\n")) {
|
||||
relPath := strings.TrimSpace(string(line))
|
||||
if relPath == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if tree-sitter can handle this file
|
||||
// DetectLanguage works on filename, not full path
|
||||
parts := strings.Split(relPath, "/")
|
||||
filename := parts[len(parts)-1]
|
||||
if entry := grammars.DetectLanguage(filename); entry != nil {
|
||||
files = append(files, relPath)
|
||||
}
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
127
main.go
Normal file
127
main.go
Normal file
@@ -0,0 +1,127 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
|
||||
"codexis/db"
|
||||
"codexis/indexer"
|
||||
)
|
||||
|
||||
const dbDir = ".codexis"
|
||||
const dbFileName = "index.db"
|
||||
|
||||
func main() {
|
||||
force := flag.Bool("force", false, "Force full re-index (ignore file hashes)")
|
||||
output := flag.String("o", "", "Output database path (default: <root>/.codexis.db)")
|
||||
flag.Parse()
|
||||
|
||||
root := "."
|
||||
if flag.NArg() > 0 {
|
||||
root = flag.Arg(0)
|
||||
}
|
||||
|
||||
absRoot, err := filepath.Abs(root)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
dbDirPath := filepath.Join(absRoot, dbDir)
|
||||
if err := os.MkdirAll(dbDirPath, 0755); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
dbPath := filepath.Join(dbDirPath, dbFileName)
|
||||
if *output != "" {
|
||||
dbPath = *output
|
||||
}
|
||||
|
||||
if err := run(absRoot, dbPath, *force); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run(root, dbPath string, force bool) error {
|
||||
ctx := context.Background()
|
||||
|
||||
sqlDB, err := sql.Open("sqlite3", dbPath+"?_journal_mode=WAL&_foreign_keys=on")
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening database: %w", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
|
||||
// Create schema
|
||||
if err := createSchema(ctx, sqlDB); err != nil {
|
||||
return fmt.Errorf("creating schema: %w", err)
|
||||
}
|
||||
|
||||
queries := db.New(sqlDB)
|
||||
idx := indexer.New(queries, root, force)
|
||||
|
||||
start := time.Now()
|
||||
fmt.Fprintf(os.Stderr, "Indexing %s...\n", root)
|
||||
|
||||
stats, err := idx.Index(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("indexing: %w", err)
|
||||
}
|
||||
|
||||
elapsed := time.Since(start)
|
||||
fmt.Fprintf(os.Stderr, "Done in %s\n", elapsed.Round(time.Millisecond))
|
||||
fmt.Fprintf(os.Stderr, " Files: %d total, %d indexed, %d unchanged\n",
|
||||
stats.FilesTotal, stats.FilesIndexed, stats.FilesSkipped)
|
||||
fmt.Fprintf(os.Stderr, " Symbols: %d\n", stats.SymbolsTotal)
|
||||
fmt.Fprintf(os.Stderr, " Output: %s\n", dbPath)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func createSchema(ctx context.Context, sqlDB *sql.DB) error {
|
||||
schema := `
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
path TEXT NOT NULL UNIQUE,
|
||||
language TEXT NOT NULL,
|
||||
package TEXT,
|
||||
hash TEXT NOT NULL,
|
||||
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS symbols (
|
||||
id INTEGER PRIMARY KEY,
|
||||
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL CHECK(kind IN (
|
||||
'function', 'method', 'class', 'type',
|
||||
'interface', 'constant', 'variable', 'constructor'
|
||||
)),
|
||||
line INTEGER NOT NULL,
|
||||
line_end INTEGER,
|
||||
col INTEGER,
|
||||
col_end INTEGER,
|
||||
exported BOOLEAN,
|
||||
parent_id INTEGER REFERENCES symbols(id),
|
||||
UNIQUE(file_id, name, kind, line)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_file_line ON symbols(file_id, line);
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_symbols_exported ON symbols(exported, kind);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_language ON files(language);
|
||||
CREATE INDEX IF NOT EXISTS idx_files_package ON files(package);
|
||||
`
|
||||
_, err := sqlDB.ExecContext(ctx, schema)
|
||||
return err
|
||||
}
|
||||
Reference in New Issue
Block a user