From 9b4e7b8ba88f65c9c4a77b461f8353cf706e2206 Mon Sep 17 00:00:00 2001 From: bndw Date: Fri, 6 Mar 2026 07:47:10 -0800 Subject: cgo --- go.mod | 14 +---- go.sum | 57 ++--------------- internal/index/index.go | 163 ++++++++++++++++++++---------------------------- 3 files changed, 72 insertions(+), 162 deletions(-) diff --git a/go.mod b/go.mod index 914896e..d641b8b 100644 --- a/go.mod +++ b/go.mod @@ -3,24 +3,14 @@ module code.northwest.io/codevec go 1.24.0 require ( + github.com/asg017/sqlite-vec-go-bindings v0.1.6 + github.com/mattn/go-sqlite3 v1.14.33 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 github.com/spf13/cobra v1.10.2 - modernc.org/sqlite v1.46.1 ) require ( - github.com/dustin/go-humanize v1.0.1 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect - github.com/ncruces/go-strftime v1.0.0 // indirect - github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/spf13/pflag v1.0.9 // indirect - golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect - golang.org/x/sync v0.19.0 // indirect - golang.org/x/sys v0.40.0 // indirect - modernc.org/libc v1.67.6 // indirect - modernc.org/mathutil v1.7.1 // indirect - modernc.org/memory v1.11.0 // indirect ) diff --git a/go.sum b/go.sum index 0d5edbc..87113e7 100644 --- a/go.sum +++ b/go.sum @@ -1,25 +1,15 @@ +github.com/asg017/sqlite-vec-go-bindings v0.1.6 h1:Nx0jAzyS38XpkKznJ9xQjFXz2X9tI7KqjwVxV8RNoww= +github.com/asg017/sqlite-vec-go-bindings v0.1.6/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= -github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= -github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= -github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= -github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= -github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= +github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0= +github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= -github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= @@ -34,46 +24,7 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= -golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= -golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= -golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= -golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= -modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0= -modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc= -modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM= -modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA= -modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc= -modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= -modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= -modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE= -modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= -modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= -modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= -modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI= -modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE= -modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= -modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= -modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= -modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= -modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= -modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= -modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= -modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= -modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= -modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= -modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= -modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= -modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= -modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/internal/index/index.go b/internal/index/index.go index 008e487..5ce9f4f 100644 --- a/internal/index/index.go +++ b/internal/index/index.go @@ -2,18 +2,16 @@ package index import ( "database/sql" - "encoding/binary" - "math" "os" "path/filepath" - "sort" - _ "modernc.org/sqlite" + sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" + _ "github.com/mattn/go-sqlite3" "code.northwest.io/codevec/internal/chunker" ) -// Index stores chunks and embeddings in SQLite +// Index stores chunks and embeddings in SQLite with sqlite-vec type Index struct { db *sql.DB dims int @@ -21,13 +19,16 @@ type Index struct { // Open opens or creates an index at the given path func Open(path string, dims int) (*Index, error) { + // Register sqlite-vec extension + sqlite_vec.Auto() + // Ensure directory exists dir := filepath.Dir(path) if err := os.MkdirAll(dir, 0755); err != nil { return nil, err } - db, err := sql.Open("sqlite", path) + db, err := sql.Open("sqlite3", path) if err != nil { return nil, err } @@ -42,7 +43,7 @@ func Open(path string, dims int) (*Index, error) { } func (idx *Index) init() error { - // Create chunks table with embedding column + // Create chunks table _, err := idx.db.Exec(` CREATE TABLE IF NOT EXISTS chunks ( id INTEGER PRIMARY KEY, @@ -53,7 +54,6 @@ func (idx *Index) init() error { name TEXT, content TEXT NOT NULL, hash TEXT NOT NULL, - embedding BLOB, created_at INTEGER DEFAULT (unixepoch()) ) `) @@ -84,6 +84,17 @@ func (idx *Index) init() error { return err } + // Create vec0 virtual table for vectors + _, err = idx.db.Exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS vectors USING vec0( + chunk_id INTEGER PRIMARY KEY, + embedding FLOAT[768] distance_metric=cosine + ) + `) + if err != nil { + return err + } + // Index on file for faster deletion _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`) return err @@ -96,11 +107,27 @@ func (idx *Index) Close() error { // InsertChunk inserts a chunk with its embedding func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error { - embeddingBlob := serializeEmbedding(embedding) - _, err := idx.db.Exec(` - INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash, embedding) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash, embeddingBlob) + // Insert chunk + result, err := idx.db.Exec(` + INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash) + VALUES (?, ?, ?, ?, ?, ?, ?) + `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash) + if err != nil { + return err + } + + chunkID, err := result.LastInsertId() + if err != nil { + return err + } + + // Insert vector + vecBlob, err := sqlite_vec.SerializeFloat32(embedding) + if err != nil { + return err + } + + _, err = idx.db.Exec(`INSERT INTO vectors (chunk_id, embedding) VALUES (?, ?)`, chunkID, vecBlob) return err } @@ -110,62 +137,39 @@ type SearchResult struct { Distance float64 } -// Search finds chunks similar to the query embedding using cosine similarity +// Search finds chunks similar to the query embedding using sqlite-vec func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) { - // Load all embeddings - rows, err := idx.db.Query(` - SELECT id, file, start_line, end_line, chunk_type, name, content, hash, embedding - FROM chunks - WHERE embedding IS NOT NULL - `) + vecBlob, err := sqlite_vec.SerializeFloat32(queryEmb) if err != nil { return nil, err } - defer rows.Close() - type candidate struct { - chunk chunker.Chunk - distance float64 + // Query similar vectors + rows, err := idx.db.Query(` + SELECT v.chunk_id, v.distance, c.file, c.start_line, c.end_line, c.chunk_type, c.name, c.content, c.hash + FROM vectors v + JOIN chunks c ON c.id = v.chunk_id + WHERE v.embedding MATCH ? AND k = ? + ORDER BY v.distance + `, vecBlob, limit) + if err != nil { + return nil, err } - var candidates []candidate + defer rows.Close() + var results []SearchResult for rows.Next() { - var id int64 + var chunkID int64 + var distance float64 var c chunker.Chunk - var embBlob []byte - err := rows.Scan(&id, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash, &embBlob) + err := rows.Scan(&chunkID, &distance, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash) if err != nil { return nil, err } - - emb := deserializeEmbedding(embBlob) - dist := cosineDistance(queryEmb, emb) - candidates = append(candidates, candidate{chunk: c, distance: dist}) - } - - if err := rows.Err(); err != nil { - return nil, err - } - - // Sort by distance (lower is better) - sort.Slice(candidates, func(i, j int) bool { - return candidates[i].distance < candidates[j].distance - }) - - // Return top-k - if limit > len(candidates) { - limit = len(candidates) + results = append(results, SearchResult{Chunk: c, Distance: distance}) } - results := make([]SearchResult, limit) - for i := 0; i < limit; i++ { - results[i] = SearchResult{ - Chunk: candidates[i].chunk, - Distance: candidates[i].distance, - } - } - - return results, nil + return results, rows.Err() } // GetFileHash returns the stored hash for a file, or empty string if not indexed @@ -189,7 +193,13 @@ func (idx *Index) SetFileHash(path, hash string) error { // DeleteChunksForFile removes all chunks for a file func (idx *Index) DeleteChunksForFile(path string) error { - _, err := idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path) + // Delete vectors for chunks in this file + _, err := idx.db.Exec(`DELETE FROM vectors WHERE chunk_id IN (SELECT id FROM chunks WHERE file = ?)`, path) + if err != nil { + return err + } + + _, err = idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path) if err != nil { return err } @@ -228,44 +238,3 @@ func (idx *Index) GetMetadata(key string) (string, error) { } return value, err } - -// serializeEmbedding converts float32 slice to bytes -func serializeEmbedding(embedding []float32) []byte { - buf := make([]byte, len(embedding)*4) - for i, v := range embedding { - binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(v)) - } - return buf -} - -// deserializeEmbedding converts bytes back to float32 slice -func deserializeEmbedding(data []byte) []float32 { - n := len(data) / 4 - result := make([]float32, n) - for i := 0; i < n; i++ { - bits := binary.LittleEndian.Uint32(data[i*4:]) - result[i] = math.Float32frombits(bits) - } - return result -} - -// cosineDistance computes 1 - cosine_similarity (so lower is more similar) -func cosineDistance(a, b []float32) float64 { - if len(a) != len(b) { - return 1.0 - } - - var dotProduct, normA, normB float64 - for i := range a { - dotProduct += float64(a[i]) * float64(b[i]) - normA += float64(a[i]) * float64(a[i]) - normB += float64(b[i]) * float64(b[i]) - } - - if normA == 0 || normB == 0 { - return 1.0 - } - - similarity := dotProduct / (math.Sqrt(normA) * math.Sqrt(normB)) - return 1.0 - similarity -} -- cgit v1.2.3