package index import ( "database/sql" "os" "path/filepath" sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" _ "github.com/mattn/go-sqlite3" "code.northwest.io/codevec/internal/chunker" ) // Index stores chunks and embeddings in SQLite with sqlite-vec type Index struct { db *sql.DB dims int } // Open opens or creates an index at the given path func Open(path string, dims int) (*Index, error) { // Register sqlite-vec extension sqlite_vec.Auto() // Ensure directory exists dir := filepath.Dir(path) if err := os.MkdirAll(dir, 0755); err != nil { return nil, err } db, err := sql.Open("sqlite3", path) if err != nil { return nil, err } idx := &Index{db: db, dims: dims} if err := idx.init(); err != nil { db.Close() return nil, err } return idx, nil } func (idx *Index) init() error { // Create chunks table _, err := idx.db.Exec(` CREATE TABLE IF NOT EXISTS chunks ( id INTEGER PRIMARY KEY, file TEXT NOT NULL, start_line INTEGER NOT NULL, end_line INTEGER NOT NULL, chunk_type TEXT, name TEXT, content TEXT NOT NULL, hash TEXT NOT NULL, created_at INTEGER DEFAULT (unixepoch()) ) `) if err != nil { return err } // Create files table for tracking indexed files _, err = idx.db.Exec(` CREATE TABLE IF NOT EXISTS files ( path TEXT PRIMARY KEY, hash TEXT NOT NULL, indexed_at INTEGER DEFAULT (unixepoch()) ) `) if err != nil { return err } // Create metadata table _, err = idx.db.Exec(` CREATE TABLE IF NOT EXISTS metadata ( key TEXT PRIMARY KEY, value TEXT ) `) if err != nil { return err } // Create vec0 virtual table for vectors _, err = idx.db.Exec(` CREATE VIRTUAL TABLE IF NOT EXISTS vectors USING vec0( chunk_id INTEGER PRIMARY KEY, embedding FLOAT[768] distance_metric=cosine ) `) if err != nil { return err } // Index on file for faster deletion _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`) return err } // Close closes the index func (idx *Index) Close() error { return idx.db.Close() } // InsertChunk inserts a chunk with its embedding func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error { // Insert chunk result, err := idx.db.Exec(` INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash) VALUES (?, ?, ?, ?, ?, ?, ?) `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash) if err != nil { return err } chunkID, err := result.LastInsertId() if err != nil { return err } // Insert vector vecBlob, err := sqlite_vec.SerializeFloat32(embedding) if err != nil { return err } _, err = idx.db.Exec(`INSERT INTO vectors (chunk_id, embedding) VALUES (?, ?)`, chunkID, vecBlob) return err } // SearchResult represents a search result type SearchResult struct { Chunk chunker.Chunk Distance float64 } // Search finds chunks similar to the query embedding using sqlite-vec func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) { vecBlob, err := sqlite_vec.SerializeFloat32(queryEmb) if err != nil { return nil, err } // Query similar vectors rows, err := idx.db.Query(` SELECT v.chunk_id, v.distance, c.file, c.start_line, c.end_line, c.chunk_type, c.name, c.content, c.hash FROM vectors v JOIN chunks c ON c.id = v.chunk_id WHERE v.embedding MATCH ? AND k = ? ORDER BY v.distance `, vecBlob, limit) if err != nil { return nil, err } defer rows.Close() var results []SearchResult for rows.Next() { var chunkID int64 var distance float64 var c chunker.Chunk err := rows.Scan(&chunkID, &distance, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash) if err != nil { return nil, err } results = append(results, SearchResult{Chunk: c, Distance: distance}) } return results, rows.Err() } // GetFileHash returns the stored hash for a file, or empty string if not indexed func (idx *Index) GetFileHash(path string) (string, error) { var hash string err := idx.db.QueryRow(`SELECT hash FROM files WHERE path = ?`, path).Scan(&hash) if err == sql.ErrNoRows { return "", nil } return hash, err } // SetFileHash updates the hash for a file func (idx *Index) SetFileHash(path, hash string) error { _, err := idx.db.Exec(` INSERT OR REPLACE INTO files (path, hash, indexed_at) VALUES (?, ?, unixepoch()) `, path, hash) return err } // DeleteChunksForFile removes all chunks for a file func (idx *Index) DeleteChunksForFile(path string) error { // Delete vectors for chunks in this file _, err := idx.db.Exec(`DELETE FROM vectors WHERE chunk_id IN (SELECT id FROM chunks WHERE file = ?)`, path) if err != nil { return err } _, err = idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path) if err != nil { return err } _, err = idx.db.Exec(`DELETE FROM files WHERE path = ?`, path) return err } // Stats returns index statistics type Stats struct { Files int Chunks int } func (idx *Index) Stats() (Stats, error) { var s Stats err := idx.db.QueryRow(`SELECT COUNT(*) FROM files`).Scan(&s.Files) if err != nil { return s, err } err = idx.db.QueryRow(`SELECT COUNT(*) FROM chunks`).Scan(&s.Chunks) return s, err } // SetMetadata stores metadata func (idx *Index) SetMetadata(key, value string) error { _, err := idx.db.Exec(`INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)`, key, value) return err } // GetMetadata retrieves metadata func (idx *Index) GetMetadata(key string) (string, error) { var value string err := idx.db.QueryRow(`SELECT value FROM metadata WHERE key = ?`, key).Scan(&value) if err == sql.ErrNoRows { return "", nil } return value, err }