initial commit

This commit is contained in:
2026-04-10 15:31:52 -04:00
commit 39fcfc2968
18 changed files with 1066 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
codexis
.codexis

62
AGENTS.md Normal file
View File

@@ -0,0 +1,62 @@
# Codexis
Tree-sitter powered code indexer. Produces a SQLite database of symbols, files, and line numbers at `.codexis/index.db`.
## Usage
```bash
codexis [flags] [root] # default root is current directory
codexis . # index cwd → .codexis/index.db
codexis -force . # full re-index (ignore file hashes)
codexis -o /tmp/out.db . # custom output path
```
## Architecture
- **`main.go`** — CLI entry, schema creation, orchestration
- **`indexer/walker.go`** — Uses `git ls-files` to find files, `grammars.DetectLanguage()` to filter
- **`indexer/indexer.go`** — For each file: hash check → tree-sitter tag → store symbols
- **`indexer/scope.go`** — Package extraction (language-specific AST queries with filepath fallback), export detection
- **`db/`** — sqlc-generated code from `schema.sql` and `queries.sql`
- **`extension/`** — Pi coding agent extension providing `codexis` tool for LLM SQL queries
## Key Dependencies
- **`github.com/odvcencio/gotreesitter`** — Pure-Go tree-sitter runtime (no CGo). 206 grammars.
- `grammars.DetectLanguage(filename)` → language detection
- `grammars.ResolveTagsQuery(entry)` → symbol extraction queries (inferred if not explicit)
- `gotreesitter.NewTagger(lang, query).Tag(src)` → returns `[]Tag` with kind, name, range
- **`github.com/mattn/go-sqlite3`** — SQLite driver
- **sqlc** — Generates Go from `db/schema.sql` + `db/queries.sql`
## Schema
Two tables: `files` and `symbols`. See `db/schema.sql`.
Symbol kinds (enforced via CHECK constraint): `function`, `method`, `class`, `type`, `interface`, `constant`, `variable`, `constructor`.
Parent-child relationships (e.g., method → class) are determined by range containment in the AST.
## Pi Extension
`extension/codexis.ts` registers a single `codexis` tool. Install:
```bash
# Symlink into pi extensions directory
ln -s $(pwd)/codexis/extension ~/.pi/agent/extensions/codexis
```
The tool finds `<git-root>/.codexis/index.db` automatically and runs read-only SQL queries. Schema is embedded in the tool description so the LLM knows the tables and valid enum values.
## Modifying
1. Schema changes: edit `db/schema.sql` + `db/queries.sql`, run `sqlc generate` in `db/`
2. New language package queries: add to `packageQueries` map in `indexer/scope.go`
3. Export detection heuristics: `IsExported()` in `indexer/scope.go`
## Principles
- **KISS** — Use the tagger as-is. Don't write custom per-language extractors unless the tagger is insufficient.
- **YAGNI** — No query CLI, no web UI, no call graph. Just produce the `.db` file.
- **Incremental** — Files are skipped if their sha256 hash hasn't changed. Use `-force` to bypass.

31
db/db.go Normal file
View File

@@ -0,0 +1,31 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.30.0
package db
import (
"context"
"database/sql"
)
type DBTX interface {
ExecContext(context.Context, string, ...interface{}) (sql.Result, error)
PrepareContext(context.Context, string) (*sql.Stmt, error)
QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error)
QueryRowContext(context.Context, string, ...interface{}) *sql.Row
}
func New(db DBTX) *Queries {
return &Queries{db: db}
}
type Queries struct {
db DBTX
}
func (q *Queries) WithTx(tx *sql.Tx) *Queries {
return &Queries{
db: tx,
}
}

31
db/models.go Normal file
View File

@@ -0,0 +1,31 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.30.0
package db
import (
"database/sql"
)
type File struct {
ID int64
Path string
Language string
Package sql.NullString
Hash string
IndexedAt sql.NullTime
}
type Symbol struct {
ID int64
FileID int64
Name string
Kind string
Line int64
LineEnd sql.NullInt64
Col sql.NullInt64
ColEnd sql.NullInt64
Exported sql.NullBool
ParentID sql.NullInt64
}

25
db/queries.sql Normal file
View File

@@ -0,0 +1,25 @@
-- name: GetFileByPath :one
SELECT id, path, language, package, hash, indexed_at
FROM files
WHERE path = ?;
-- name: UpsertFile :one
INSERT INTO files (path, language, package, hash)
VALUES (?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
language = excluded.language,
package = excluded.package,
hash = excluded.hash,
indexed_at = CURRENT_TIMESTAMP
RETURNING id, path, language, package, hash, indexed_at;
-- name: DeleteSymbolsByFileID :exec
DELETE FROM symbols WHERE file_id = ?;
-- name: InsertSymbol :one
INSERT INTO symbols (file_id, name, kind, line, line_end, col, col_end, exported, parent_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
RETURNING id;
-- name: DeleteStaleFiles :exec
DELETE FROM files WHERE path NOT IN (sqlc.slice('paths'));

132
db/queries.sql.go Normal file
View File

@@ -0,0 +1,132 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.30.0
// source: queries.sql
package db
import (
"context"
"database/sql"
"strings"
)
const deleteStaleFiles = `-- name: DeleteStaleFiles :exec
DELETE FROM files WHERE path NOT IN (/*SLICE:paths*/?)
`
func (q *Queries) DeleteStaleFiles(ctx context.Context, paths []string) error {
query := deleteStaleFiles
var queryParams []interface{}
if len(paths) > 0 {
for _, v := range paths {
queryParams = append(queryParams, v)
}
query = strings.Replace(query, "/*SLICE:paths*/?", strings.Repeat(",?", len(paths))[1:], 1)
} else {
query = strings.Replace(query, "/*SLICE:paths*/?", "NULL", 1)
}
_, err := q.db.ExecContext(ctx, query, queryParams...)
return err
}
const deleteSymbolsByFileID = `-- name: DeleteSymbolsByFileID :exec
DELETE FROM symbols WHERE file_id = ?
`
func (q *Queries) DeleteSymbolsByFileID(ctx context.Context, fileID int64) error {
_, err := q.db.ExecContext(ctx, deleteSymbolsByFileID, fileID)
return err
}
const getFileByPath = `-- name: GetFileByPath :one
SELECT id, path, language, package, hash, indexed_at
FROM files
WHERE path = ?
`
func (q *Queries) GetFileByPath(ctx context.Context, path string) (File, error) {
row := q.db.QueryRowContext(ctx, getFileByPath, path)
var i File
err := row.Scan(
&i.ID,
&i.Path,
&i.Language,
&i.Package,
&i.Hash,
&i.IndexedAt,
)
return i, err
}
const insertSymbol = `-- name: InsertSymbol :one
INSERT INTO symbols (file_id, name, kind, line, line_end, col, col_end, exported, parent_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
RETURNING id
`
type InsertSymbolParams struct {
FileID int64
Name string
Kind string
Line int64
LineEnd sql.NullInt64
Col sql.NullInt64
ColEnd sql.NullInt64
Exported sql.NullBool
ParentID sql.NullInt64
}
func (q *Queries) InsertSymbol(ctx context.Context, arg InsertSymbolParams) (int64, error) {
row := q.db.QueryRowContext(ctx, insertSymbol,
arg.FileID,
arg.Name,
arg.Kind,
arg.Line,
arg.LineEnd,
arg.Col,
arg.ColEnd,
arg.Exported,
arg.ParentID,
)
var id int64
err := row.Scan(&id)
return id, err
}
const upsertFile = `-- name: UpsertFile :one
INSERT INTO files (path, language, package, hash)
VALUES (?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
language = excluded.language,
package = excluded.package,
hash = excluded.hash,
indexed_at = CURRENT_TIMESTAMP
RETURNING id, path, language, package, hash, indexed_at
`
type UpsertFileParams struct {
Path string
Language string
Package sql.NullString
Hash string
}
func (q *Queries) UpsertFile(ctx context.Context, arg UpsertFileParams) (File, error) {
row := q.db.QueryRowContext(ctx, upsertFile,
arg.Path,
arg.Language,
arg.Package,
arg.Hash,
)
var i File
err := row.Scan(
&i.ID,
&i.Path,
&i.Language,
&i.Package,
&i.Hash,
&i.IndexedAt,
)
return i, err
}

34
db/schema.sql Normal file
View File

@@ -0,0 +1,34 @@
CREATE TABLE files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
language TEXT NOT NULL,
package TEXT,
hash TEXT NOT NULL,
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE symbols (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
name TEXT NOT NULL,
kind TEXT NOT NULL CHECK(kind IN (
'function', 'method', 'class', 'type',
'interface', 'constant', 'variable', 'constructor'
)),
line INTEGER NOT NULL,
line_end INTEGER,
col INTEGER,
col_end INTEGER,
exported BOOLEAN,
parent_id INTEGER REFERENCES symbols(id),
UNIQUE(file_id, name, kind, line)
);
CREATE INDEX idx_symbols_name ON symbols(name);
CREATE INDEX idx_symbols_kind ON symbols(kind);
CREATE INDEX idx_symbols_file_line ON symbols(file_id, line);
CREATE INDEX idx_symbols_parent ON symbols(parent_id);
CREATE INDEX idx_symbols_exported ON symbols(exported, kind);
CREATE INDEX idx_files_path ON files(path);
CREATE INDEX idx_files_language ON files(language);
CREATE INDEX idx_files_package ON files(package);

9
db/sqlc.yaml Normal file
View File

@@ -0,0 +1,9 @@
version: "2"
sql:
- engine: "sqlite"
queries: "queries.sql"
schema: "schema.sql"
gen:
go:
package: "db"
out: "."

3
extension/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
node_modules
package.json
package-lock.json

161
extension/codexis.ts Normal file
View File

@@ -0,0 +1,161 @@
/**
* Codexis - Code index query tool for pi
*
* Provides a single tool that queries the .codexis/index.db SQLite database
* containing symbols, files, and line numbers for the codebase.
*/
import { Type } from "@mariozechner/pi-ai";
import { defineTool, type ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { execSync } from "node:child_process";
import { existsSync } from "node:fs";
import { join } from "node:path";
import Database from "better-sqlite3";
const SCHEMA = `-- .codexis/index.db schema:
--
-- files: indexed source files
-- id INTEGER PRIMARY KEY
-- path TEXT NOT NULL UNIQUE -- relative to repo root
-- language TEXT NOT NULL -- e.g. 'go', 'typescript', 'python', 'tsx', 'proto'
-- package TEXT -- package/module name (from AST or directory)
-- hash TEXT NOT NULL -- sha256, for incremental indexing
-- indexed_at DATETIME
--
-- symbols: definitions extracted via tree-sitter
-- id INTEGER PRIMARY KEY
-- file_id INTEGER NOT NULL REFERENCES files(id)
-- name TEXT NOT NULL
-- kind TEXT NOT NULL -- one of: 'function','method','class','type','interface','constant','variable','constructor'
-- line INTEGER NOT NULL -- 1-indexed
-- line_end INTEGER -- end of definition body
-- col INTEGER
-- col_end INTEGER
-- exported BOOLEAN -- language-specific visibility
-- parent_id INTEGER REFERENCES symbols(id) -- e.g. method→class, field→struct`;
const DESCRIPTION = `Query the code index database (.codexis/index.db). Run read-only SQL to find symbols, files, and line numbers across the codebase.
${SCHEMA}
Example queries:
-- Find where a function is defined
SELECT f.path, s.line FROM symbols s JOIN files f ON s.file_id=f.id WHERE s.name='HandleRequest'
-- Public API of a package
SELECT s.name, s.kind, s.line, f.path FROM symbols s JOIN files f ON s.file_id=f.id WHERE f.package='server' AND s.exported=1
-- All types in a directory
SELECT s.name, s.line, f.path FROM symbols s JOIN files f ON s.file_id=f.id WHERE f.path LIKE 'backend/api/%' AND s.kind='type'
-- Methods on a class/type (via parent_id)
SELECT c.name as parent, s.name, s.kind, s.line FROM symbols s JOIN symbols c ON s.parent_id=c.id WHERE c.name='AuthService'
-- Overview: symbols per area
SELECT CASE WHEN f.path LIKE 'backend/%' THEN 'backend' WHEN f.path LIKE 'frontend/%' THEN 'frontend' ELSE 'other' END as area, COUNT(*) FROM symbols s JOIN files f ON s.file_id=f.id GROUP BY area`;
function findGitRoot(cwd: string): string | null {
try {
return execSync("git rev-parse --show-toplevel", {
cwd,
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
}).trim();
} catch {
return null;
}
}
function findDatabase(cwd: string): string | null {
const gitRoot = findGitRoot(cwd);
if (!gitRoot) return null;
const dbPath = join(gitRoot, ".codexis", "index.db");
if (!existsSync(dbPath)) return null;
return dbPath;
}
const codexisTool = defineTool({
name: "codexis",
label: "Codexis",
description: DESCRIPTION,
parameters: Type.Object({
sql: Type.String({
description: "SQL query to run against the code index database",
}),
}),
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
const dbPath = findDatabase(ctx.cwd);
if (!dbPath) {
throw new Error(
"No code index found. Run `codexis` in the repo root to generate .codexis/index.db"
);
}
const db = new Database(dbPath, { readonly: true });
try {
// Block writes
const normalized = params.sql.trim().toUpperCase();
if (
!normalized.startsWith("SELECT") &&
!normalized.startsWith("WITH") &&
!normalized.startsWith("EXPLAIN") &&
!normalized.startsWith("PRAGMA")
) {
throw new Error("Only SELECT, WITH, EXPLAIN, and PRAGMA queries are allowed");
}
const stmt = db.prepare(params.sql);
const rows = stmt.all();
if (rows.length === 0) {
return {
content: [{ type: "text", text: "No results." }],
details: { rowCount: 0 },
};
}
// Format as aligned text table
const columns = Object.keys(rows[0] as Record<string, unknown>);
const data = rows.map((row) => {
const r = row as Record<string, unknown>;
return columns.map((col) => String(r[col] ?? "NULL"));
});
const widths = columns.map((col, i) =>
Math.max(col.length, ...data.map((row) => row[i].length))
);
const header = columns
.map((col, i) => col.padEnd(widths[i]))
.join(" ");
const separator = widths.map((w) => "-".repeat(w)).join(" ");
const body = data
.map((row) =>
row.map((val, i) => val.padEnd(widths[i])).join(" ")
)
.join("\n");
const result = `${header}\n${separator}\n${body}`;
// Truncate if huge
const maxLen = 48000;
const truncated =
result.length > maxLen
? result.slice(0, maxLen) +
`\n\n[Truncated: ${rows.length} rows total, showing partial results. Narrow your query.]`
: result;
return {
content: [{ type: "text", text: truncated }],
details: { rowCount: rows.length },
};
} finally {
db.close();
}
},
});
export default function (pi: ExtensionAPI) {
pi.registerTool(codexisTool);
}

61
flake.lock generated Normal file
View File

@@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1771208521,
"narHash": "sha256-X01Q3DgSpjeBpapoGA4rzKOn25qdKxbPnxHeMLNoHTU=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "fa56d7d6de78f5a7f997b0ea2bc6efd5868ad9e8",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.11",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

28
flake.nix Normal file
View File

@@ -0,0 +1,28 @@
{
description = "Dev Shell";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
flake-utils.url = "github:numtide/flake-utils";
};
outputs =
{ self
, nixpkgs
, flake-utils
,
}:
flake-utils.lib.eachDefaultSystem (
system:
let
pkgs = nixpkgs.legacyPackages.${system};
in
{
devShells.default = pkgs.mkShell {
packages = with pkgs; [
go
];
};
}
);
}

8
go.mod Normal file
View File

@@ -0,0 +1,8 @@
module codexis
go 1.25.0
require (
github.com/mattn/go-sqlite3 v1.14.42
github.com/odvcencio/gotreesitter v0.13.4
)

4
go.sum Normal file
View File

@@ -0,0 +1,4 @@
github.com/mattn/go-sqlite3 v1.14.42 h1:MigqEP4ZmHw3aIdIT7T+9TLa90Z6smwcthx+Azv4Cgo=
github.com/mattn/go-sqlite3 v1.14.42/go.mod h1:pjEuOr8IwzLJP2MfGeTb0A35jauH+C2kbHKBr7yXKVQ=
github.com/odvcencio/gotreesitter v0.13.4 h1:O/FqOlabRz1Neg6UISx0URtwuN1FQ2eGCc846KHcBbQ=
github.com/odvcencio/gotreesitter v0.13.4/go.mod h1:Sx+iYJBfw5xSWkSttLSuFvguJctlH+ma1BTxZ0MPCqo=

218
indexer/indexer.go Normal file
View File

@@ -0,0 +1,218 @@
package indexer
import (
"context"
"crypto/sha256"
"database/sql"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/odvcencio/gotreesitter"
"github.com/odvcencio/gotreesitter/grammars"
"codexis/db"
)
// Indexer walks a codebase, extracts symbols via tree-sitter, and stores them in SQLite.
type Indexer struct {
queries *db.Queries
root string
force bool
}
// New creates a new Indexer.
func New(queries *db.Queries, root string, force bool) *Indexer {
return &Indexer{
queries: queries,
root: root,
force: force,
}
}
// Stats holds indexing statistics.
type Stats struct {
FilesTotal int
FilesIndexed int
FilesSkipped int
SymbolsTotal int
}
// Index walks the codebase and indexes all recognized files.
func (idx *Indexer) Index(ctx context.Context) (*Stats, error) {
files, err := WalkFiles(idx.root)
if err != nil {
return nil, fmt.Errorf("walking files: %w", err)
}
stats := &Stats{FilesTotal: len(files)}
for _, relPath := range files {
indexed, symbolCount, err := idx.indexFile(ctx, relPath)
if err != nil {
fmt.Fprintf(os.Stderr, "warn: %s: %v\n", relPath, err)
continue
}
if indexed {
stats.FilesIndexed++
stats.SymbolsTotal += symbolCount
} else {
stats.FilesSkipped++
}
}
// Clean up files that no longer exist
if err := idx.queries.DeleteStaleFiles(ctx, files); err != nil {
return nil, fmt.Errorf("cleaning stale files: %w", err)
}
return stats, nil
}
func (idx *Indexer) indexFile(ctx context.Context, relPath string) (indexed bool, symbolCount int, err error) {
absPath := filepath.Join(idx.root, relPath)
src, err := os.ReadFile(absPath)
if err != nil {
return false, 0, fmt.Errorf("reading file: %w", err)
}
hash := fmt.Sprintf("%x", sha256.Sum256(src))
// Check if file has changed
if !idx.force {
existing, err := idx.queries.GetFileByPath(ctx, relPath)
if err == nil && existing.Hash == hash {
return false, 0, nil // unchanged
}
}
// Detect language
entry := grammars.DetectLanguage(filepath.Base(relPath))
if entry == nil {
return false, 0, nil
}
// Extract package
pkg := ExtractPackage(src, relPath, entry)
// Upsert file record
file, err := idx.queries.UpsertFile(ctx, db.UpsertFileParams{
Path: relPath,
Language: entry.Name,
Package: sql.NullString{String: pkg, Valid: pkg != ""},
Hash: hash,
})
if err != nil {
return false, 0, fmt.Errorf("upserting file: %w", err)
}
// Clear old symbols
if err := idx.queries.DeleteSymbolsByFileID(ctx, file.ID); err != nil {
return false, 0, fmt.Errorf("deleting old symbols: %w", err)
}
// Extract and store symbols
tags := extractTags(src, entry)
defs := buildSymbolDefs(tags, file.ID, entry.Name)
// Insert symbols in order, tracking DB IDs for parent resolution
dbIDs := make([]int64, len(defs))
for i, def := range defs {
// Resolve parent_id from local index to actual DB ID
params := def.params
if params.ParentID.Valid {
parentIdx := params.ParentID.Int64
params.ParentID = sql.NullInt64{Int64: dbIDs[parentIdx], Valid: true}
}
id, err := idx.queries.InsertSymbol(ctx, params)
if err != nil {
return false, 0, fmt.Errorf("inserting symbol %q: %w", params.Name, err)
}
dbIDs[i] = id
}
return true, len(defs), nil
}
func extractTags(src []byte, entry *grammars.LangEntry) []gotreesitter.Tag {
lang := entry.Language()
// ResolveTagsQuery returns the explicit TagsQuery if set, otherwise infers
// one from the grammar's symbol table.
tagsQuery := grammars.ResolveTagsQuery(*entry)
if tagsQuery == "" {
return nil
}
tagger, err := gotreesitter.NewTagger(lang, tagsQuery)
if err != nil {
return nil
}
return tagger.Tag(src)
}
type symbolDef struct {
tag gotreesitter.Tag
params db.InsertSymbolParams
}
func buildSymbolDefs(tags []gotreesitter.Tag, fileID int64, langName string) []symbolDef {
// First pass: collect all definition tags
var defs []symbolDef
for _, tag := range tags {
kind := tagKind(tag.Kind)
if kind == "" {
continue // skip references and unknown kinds
}
exported := IsExported(tag.Name, langName)
params := db.InsertSymbolParams{
FileID: fileID,
Name: tag.Name,
Kind: kind,
Line: int64(tag.NameRange.StartPoint.Row) + 1, // 1-indexed
LineEnd: sql.NullInt64{Int64: int64(tag.Range.EndPoint.Row) + 1, Valid: true},
Col: sql.NullInt64{Int64: int64(tag.NameRange.StartPoint.Column), Valid: true},
ColEnd: sql.NullInt64{Int64: int64(tag.NameRange.EndPoint.Column), Valid: true},
Exported: sql.NullBool{Bool: exported, Valid: true},
ParentID: sql.NullInt64{Valid: false},
}
defs = append(defs, symbolDef{tag: tag, params: params})
}
// Second pass: determine parent relationships based on range containment.
// ParentID stores the local index — resolved to DB ID during insert.
// Tree-sitter returns tags in document order (outer before inner),
// so scanning backwards finds the nearest enclosing definition.
for i := range defs {
for j := i - 1; j >= 0; j-- {
if containsRange(defs[j].tag.Range, defs[i].tag.Range) {
defs[i].params.ParentID = sql.NullInt64{Int64: int64(j), Valid: true}
break
}
}
}
return defs
}
func containsRange(outer, inner gotreesitter.Range) bool {
return outer.StartByte <= inner.StartByte && outer.EndByte >= inner.EndByte
}
// tagKind converts a tree-sitter tag kind like "definition.function" to "function".
// Returns empty string for non-definition tags.
func tagKind(kind string) string {
const prefix = "definition."
if strings.HasPrefix(kind, prefix) {
return kind[len(prefix):]
}
return ""
}

92
indexer/scope.go Normal file
View File

@@ -0,0 +1,92 @@
package indexer
import (
"path/filepath"
"strings"
"github.com/odvcencio/gotreesitter"
"github.com/odvcencio/gotreesitter/grammars"
)
// packageQueries maps language names to tree-sitter queries that extract the
// package/module declaration. The query must capture the package name as @name.
var packageQueries = map[string]string{
"go": `(package_clause (package_identifier) @name)`,
"proto": `(package (full_ident) @name)`,
"java": `(package_declaration (scoped_identifier) @name)`,
"kotlin": `(package_header (identifier) @name)`,
"scala": `(package_clause (identifier) @name)`,
"rust": `(mod_item name: (identifier) @name)`,
"elixir": `(call target: (dot left: (alias) @name))`, // defmodule
"erlang": `(module_attribute name: (atom) @name)`,
}
// ExtractPackage extracts the package/module name from source code.
// Falls back to deriving from the file path if no language-specific query exists
// or the query finds no match.
func ExtractPackage(src []byte, filePath string, entry *grammars.LangEntry) string {
if queryStr, ok := packageQueries[entry.Name]; ok {
lang := entry.Language()
if pkg := runPackageQuery(src, lang, queryStr); pkg != "" {
return pkg
}
}
// Fallback: derive from directory name
dir := filepath.Dir(filePath)
if dir == "." || dir == "" {
return ""
}
return filepath.Base(dir)
}
func runPackageQuery(src []byte, lang *gotreesitter.Language, queryStr string) string {
parser := gotreesitter.NewParser(lang)
tree, err := parser.Parse(src)
if err != nil || tree == nil || tree.RootNode() == nil {
return ""
}
defer tree.Release()
query, err := gotreesitter.NewQuery(queryStr, lang)
if err != nil {
return ""
}
cursor := query.Exec(tree.RootNode(), lang, src)
for {
match, ok := cursor.NextMatch()
if !ok {
break
}
for _, cap := range match.Captures {
if cap.Name == "name" {
return cap.Node.Text(src)
}
}
}
return ""
}
// IsExported determines if a symbol name is exported/public based on language conventions.
func IsExported(name string, langName string) bool {
if name == "" {
return false
}
switch langName {
case "go":
// Go: exported if first letter is uppercase
return name[0] >= 'A' && name[0] <= 'Z'
case "python":
// Python: private if starts with underscore
return !strings.HasPrefix(name, "_")
case "rust":
// Rust: pub is in the AST, but we approximate: starts with uppercase for types
// For functions, we can't tell without `pub` keyword — default to true
return true
default:
// Most languages (JS/TS/Java/etc): export/public is a modifier in the AST
// We can't reliably determine from name alone — default to nil/unknown
return true
}
}

38
indexer/walker.go Normal file
View File

@@ -0,0 +1,38 @@
package indexer
import (
"bytes"
"os/exec"
"strings"
"github.com/odvcencio/gotreesitter/grammars"
)
// WalkFiles returns all git-tracked files that tree-sitter can parse.
// It uses `git ls-files` to respect .gitignore rules correctly.
func WalkFiles(root string) ([]string, error) {
cmd := exec.Command("git", "ls-files", "--cached", "--others", "--exclude-standard")
cmd.Dir = root
out, err := cmd.Output()
if err != nil {
return nil, err
}
var files []string
for _, line := range bytes.Split(out, []byte("\n")) {
relPath := strings.TrimSpace(string(line))
if relPath == "" {
continue
}
// Check if tree-sitter can handle this file
// DetectLanguage works on filename, not full path
parts := strings.Split(relPath, "/")
filename := parts[len(parts)-1]
if entry := grammars.DetectLanguage(filename); entry != nil {
files = append(files, relPath)
}
}
return files, nil
}

127
main.go Normal file
View File

@@ -0,0 +1,127 @@
package main
import (
"context"
"database/sql"
"flag"
"fmt"
"os"
"path/filepath"
"time"
_ "github.com/mattn/go-sqlite3"
"codexis/db"
"codexis/indexer"
)
const dbDir = ".codexis"
const dbFileName = "index.db"
func main() {
force := flag.Bool("force", false, "Force full re-index (ignore file hashes)")
output := flag.String("o", "", "Output database path (default: <root>/.codexis.db)")
flag.Parse()
root := "."
if flag.NArg() > 0 {
root = flag.Arg(0)
}
absRoot, err := filepath.Abs(root)
if err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
dbDirPath := filepath.Join(absRoot, dbDir)
if err := os.MkdirAll(dbDirPath, 0755); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
dbPath := filepath.Join(dbDirPath, dbFileName)
if *output != "" {
dbPath = *output
}
if err := run(absRoot, dbPath, *force); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
os.Exit(1)
}
}
func run(root, dbPath string, force bool) error {
ctx := context.Background()
sqlDB, err := sql.Open("sqlite3", dbPath+"?_journal_mode=WAL&_foreign_keys=on")
if err != nil {
return fmt.Errorf("opening database: %w", err)
}
defer sqlDB.Close()
// Create schema
if err := createSchema(ctx, sqlDB); err != nil {
return fmt.Errorf("creating schema: %w", err)
}
queries := db.New(sqlDB)
idx := indexer.New(queries, root, force)
start := time.Now()
fmt.Fprintf(os.Stderr, "Indexing %s...\n", root)
stats, err := idx.Index(ctx)
if err != nil {
return fmt.Errorf("indexing: %w", err)
}
elapsed := time.Since(start)
fmt.Fprintf(os.Stderr, "Done in %s\n", elapsed.Round(time.Millisecond))
fmt.Fprintf(os.Stderr, " Files: %d total, %d indexed, %d unchanged\n",
stats.FilesTotal, stats.FilesIndexed, stats.FilesSkipped)
fmt.Fprintf(os.Stderr, " Symbols: %d\n", stats.SymbolsTotal)
fmt.Fprintf(os.Stderr, " Output: %s\n", dbPath)
return nil
}
func createSchema(ctx context.Context, sqlDB *sql.DB) error {
schema := `
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
language TEXT NOT NULL,
package TEXT,
hash TEXT NOT NULL,
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS symbols (
id INTEGER PRIMARY KEY,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
name TEXT NOT NULL,
kind TEXT NOT NULL CHECK(kind IN (
'function', 'method', 'class', 'type',
'interface', 'constant', 'variable', 'constructor'
)),
line INTEGER NOT NULL,
line_end INTEGER,
col INTEGER,
col_end INTEGER,
exported BOOLEAN,
parent_id INTEGER REFERENCES symbols(id),
UNIQUE(file_id, name, kind, line)
);
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
CREATE INDEX IF NOT EXISTS idx_symbols_file_line ON symbols(file_id, line);
CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
CREATE INDEX IF NOT EXISTS idx_symbols_exported ON symbols(exported, kind);
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
CREATE INDEX IF NOT EXISTS idx_files_language ON files(language);
CREATE INDEX IF NOT EXISTS idx_files_package ON files(package);
`
_, err := sqlDB.ExecContext(ctx, schema)
return err
}