aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--go.mod14
-rw-r--r--go.sum57
-rw-r--r--internal/index/index.go163
3 files changed, 72 insertions, 162 deletions
diff --git a/go.mod b/go.mod
index 914896e..d641b8b 100644
--- a/go.mod
+++ b/go.mod
@@ -3,24 +3,14 @@ module code.northwest.io/codevec
3go 1.24.0 3go 1.24.0
4 4
5require ( 5require (
6 github.com/asg017/sqlite-vec-go-bindings v0.1.6
7 github.com/mattn/go-sqlite3 v1.14.33
6 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 8 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
7 github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 9 github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82
8 github.com/spf13/cobra v1.10.2 10 github.com/spf13/cobra v1.10.2
9 modernc.org/sqlite v1.46.1
10) 11)
11 12
12require ( 13require (
13 github.com/dustin/go-humanize v1.0.1 // indirect
14 github.com/google/uuid v1.6.0 // indirect
15 github.com/inconshreveable/mousetrap v1.1.0 // indirect 14 github.com/inconshreveable/mousetrap v1.1.0 // indirect
16 github.com/mattn/go-isatty v0.0.20 // indirect
17 github.com/ncruces/go-strftime v1.0.0 // indirect
18 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
19 github.com/spf13/pflag v1.0.9 // indirect 15 github.com/spf13/pflag v1.0.9 // indirect
20 golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
21 golang.org/x/sync v0.19.0 // indirect
22 golang.org/x/sys v0.40.0 // indirect
23 modernc.org/libc v1.67.6 // indirect
24 modernc.org/mathutil v1.7.1 // indirect
25 modernc.org/memory v1.11.0 // indirect
26) 16)
diff --git a/go.sum b/go.sum
index 0d5edbc..87113e7 100644
--- a/go.sum
+++ b/go.sum
@@ -1,25 +1,15 @@
1github.com/asg017/sqlite-vec-go-bindings v0.1.6 h1:Nx0jAzyS38XpkKznJ9xQjFXz2X9tI7KqjwVxV8RNoww=
2github.com/asg017/sqlite-vec-go-bindings v0.1.6/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q=
1github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= 3github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
2github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 5github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
6github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
7github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
8github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
9github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
10github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
11github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
12github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
13github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 7github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
14github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 8github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
15github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 9github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0=
16github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 10github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
17github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
18github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
19github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 11github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
20github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 12github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
21github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
22github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
23github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 13github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
24github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= 14github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
25github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= 15github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
@@ -34,46 +24,7 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
34github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 24github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
35github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 25github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
36go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= 26go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
37golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
38golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
39golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
40golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
41golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
42golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
43golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
44golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
45golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
46golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
47golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
48gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 27gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
49gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 28gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
50gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 29gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
51gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 30gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
52modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
53modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
54modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
55modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
56modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
57modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
58modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
59modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
60modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
61modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
62modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
63modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
64modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
65modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
66modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
67modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
68modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
69modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
70modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
71modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
72modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
73modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
74modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU=
75modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
76modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
77modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
78modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
79modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
diff --git a/internal/index/index.go b/internal/index/index.go
index 008e487..5ce9f4f 100644
--- a/internal/index/index.go
+++ b/internal/index/index.go
@@ -2,18 +2,16 @@ package index
2 2
3import ( 3import (
4 "database/sql" 4 "database/sql"
5 "encoding/binary"
6 "math"
7 "os" 5 "os"
8 "path/filepath" 6 "path/filepath"
9 "sort"
10 7
11 _ "modernc.org/sqlite" 8 sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo"
9 _ "github.com/mattn/go-sqlite3"
12 10
13 "code.northwest.io/codevec/internal/chunker" 11 "code.northwest.io/codevec/internal/chunker"
14) 12)
15 13
16// Index stores chunks and embeddings in SQLite 14// Index stores chunks and embeddings in SQLite with sqlite-vec
17type Index struct { 15type Index struct {
18 db *sql.DB 16 db *sql.DB
19 dims int 17 dims int
@@ -21,13 +19,16 @@ type Index struct {
21 19
22// Open opens or creates an index at the given path 20// Open opens or creates an index at the given path
23func Open(path string, dims int) (*Index, error) { 21func Open(path string, dims int) (*Index, error) {
22 // Register sqlite-vec extension
23 sqlite_vec.Auto()
24
24 // Ensure directory exists 25 // Ensure directory exists
25 dir := filepath.Dir(path) 26 dir := filepath.Dir(path)
26 if err := os.MkdirAll(dir, 0755); err != nil { 27 if err := os.MkdirAll(dir, 0755); err != nil {
27 return nil, err 28 return nil, err
28 } 29 }
29 30
30 db, err := sql.Open("sqlite", path) 31 db, err := sql.Open("sqlite3", path)
31 if err != nil { 32 if err != nil {
32 return nil, err 33 return nil, err
33 } 34 }
@@ -42,7 +43,7 @@ func Open(path string, dims int) (*Index, error) {
42} 43}
43 44
44func (idx *Index) init() error { 45func (idx *Index) init() error {
45 // Create chunks table with embedding column 46 // Create chunks table
46 _, err := idx.db.Exec(` 47 _, err := idx.db.Exec(`
47 CREATE TABLE IF NOT EXISTS chunks ( 48 CREATE TABLE IF NOT EXISTS chunks (
48 id INTEGER PRIMARY KEY, 49 id INTEGER PRIMARY KEY,
@@ -53,7 +54,6 @@ func (idx *Index) init() error {
53 name TEXT, 54 name TEXT,
54 content TEXT NOT NULL, 55 content TEXT NOT NULL,
55 hash TEXT NOT NULL, 56 hash TEXT NOT NULL,
56 embedding BLOB,
57 created_at INTEGER DEFAULT (unixepoch()) 57 created_at INTEGER DEFAULT (unixepoch())
58 ) 58 )
59 `) 59 `)
@@ -84,6 +84,17 @@ func (idx *Index) init() error {
84 return err 84 return err
85 } 85 }
86 86
87 // Create vec0 virtual table for vectors
88 _, err = idx.db.Exec(`
89 CREATE VIRTUAL TABLE IF NOT EXISTS vectors USING vec0(
90 chunk_id INTEGER PRIMARY KEY,
91 embedding FLOAT[768] distance_metric=cosine
92 )
93 `)
94 if err != nil {
95 return err
96 }
97
87 // Index on file for faster deletion 98 // Index on file for faster deletion
88 _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`) 99 _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`)
89 return err 100 return err
@@ -96,11 +107,27 @@ func (idx *Index) Close() error {
96 107
97// InsertChunk inserts a chunk with its embedding 108// InsertChunk inserts a chunk with its embedding
98func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error { 109func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error {
99 embeddingBlob := serializeEmbedding(embedding) 110 // Insert chunk
100 _, err := idx.db.Exec(` 111 result, err := idx.db.Exec(`
101 INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash, embedding) 112 INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash)
102 VALUES (?, ?, ?, ?, ?, ?, ?, ?) 113 VALUES (?, ?, ?, ?, ?, ?, ?)
103 `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash, embeddingBlob) 114 `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash)
115 if err != nil {
116 return err
117 }
118
119 chunkID, err := result.LastInsertId()
120 if err != nil {
121 return err
122 }
123
124 // Insert vector
125 vecBlob, err := sqlite_vec.SerializeFloat32(embedding)
126 if err != nil {
127 return err
128 }
129
130 _, err = idx.db.Exec(`INSERT INTO vectors (chunk_id, embedding) VALUES (?, ?)`, chunkID, vecBlob)
104 return err 131 return err
105} 132}
106 133
@@ -110,62 +137,39 @@ type SearchResult struct {
110 Distance float64 137 Distance float64
111} 138}
112 139
113// Search finds chunks similar to the query embedding using cosine similarity 140// Search finds chunks similar to the query embedding using sqlite-vec
114func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) { 141func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) {
115 // Load all embeddings 142 vecBlob, err := sqlite_vec.SerializeFloat32(queryEmb)
116 rows, err := idx.db.Query(`
117 SELECT id, file, start_line, end_line, chunk_type, name, content, hash, embedding
118 FROM chunks
119 WHERE embedding IS NOT NULL
120 `)
121 if err != nil { 143 if err != nil {
122 return nil, err 144 return nil, err
123 } 145 }
124 defer rows.Close()
125 146
126 type candidate struct { 147 // Query similar vectors
127 chunk chunker.Chunk 148 rows, err := idx.db.Query(`
128 distance float64 149 SELECT v.chunk_id, v.distance, c.file, c.start_line, c.end_line, c.chunk_type, c.name, c.content, c.hash
150 FROM vectors v
151 JOIN chunks c ON c.id = v.chunk_id
152 WHERE v.embedding MATCH ? AND k = ?
153 ORDER BY v.distance
154 `, vecBlob, limit)
155 if err != nil {
156 return nil, err
129 } 157 }
130 var candidates []candidate 158 defer rows.Close()
131 159
160 var results []SearchResult
132 for rows.Next() { 161 for rows.Next() {
133 var id int64 162 var chunkID int64
163 var distance float64
134 var c chunker.Chunk 164 var c chunker.Chunk
135 var embBlob []byte 165 err := rows.Scan(&chunkID, &distance, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash)
136 err := rows.Scan(&id, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash, &embBlob)
137 if err != nil { 166 if err != nil {
138 return nil, err 167 return nil, err
139 } 168 }
140 169 results = append(results, SearchResult{Chunk: c, Distance: distance})
141 emb := deserializeEmbedding(embBlob)
142 dist := cosineDistance(queryEmb, emb)
143 candidates = append(candidates, candidate{chunk: c, distance: dist})
144 }
145
146 if err := rows.Err(); err != nil {
147 return nil, err
148 }
149
150 // Sort by distance (lower is better)
151 sort.Slice(candidates, func(i, j int) bool {
152 return candidates[i].distance < candidates[j].distance
153 })
154
155 // Return top-k
156 if limit > len(candidates) {
157 limit = len(candidates)
158 } 170 }
159 171
160 results := make([]SearchResult, limit) 172 return results, rows.Err()
161 for i := 0; i < limit; i++ {
162 results[i] = SearchResult{
163 Chunk: candidates[i].chunk,
164 Distance: candidates[i].distance,
165 }
166 }
167
168 return results, nil
169} 173}
170 174
171// GetFileHash returns the stored hash for a file, or empty string if not indexed 175// GetFileHash returns the stored hash for a file, or empty string if not indexed
@@ -189,7 +193,13 @@ func (idx *Index) SetFileHash(path, hash string) error {
189 193
190// DeleteChunksForFile removes all chunks for a file 194// DeleteChunksForFile removes all chunks for a file
191func (idx *Index) DeleteChunksForFile(path string) error { 195func (idx *Index) DeleteChunksForFile(path string) error {
192 _, err := idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path) 196 // Delete vectors for chunks in this file
197 _, err := idx.db.Exec(`DELETE FROM vectors WHERE chunk_id IN (SELECT id FROM chunks WHERE file = ?)`, path)
198 if err != nil {
199 return err
200 }
201
202 _, err = idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path)
193 if err != nil { 203 if err != nil {
194 return err 204 return err
195 } 205 }
@@ -228,44 +238,3 @@ func (idx *Index) GetMetadata(key string) (string, error) {
228 } 238 }
229 return value, err 239 return value, err
230} 240}
231
232// serializeEmbedding converts float32 slice to bytes
233func serializeEmbedding(embedding []float32) []byte {
234 buf := make([]byte, len(embedding)*4)
235 for i, v := range embedding {
236 binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(v))
237 }
238 return buf
239}
240
241// deserializeEmbedding converts bytes back to float32 slice
242func deserializeEmbedding(data []byte) []float32 {
243 n := len(data) / 4
244 result := make([]float32, n)
245 for i := 0; i < n; i++ {
246 bits := binary.LittleEndian.Uint32(data[i*4:])
247 result[i] = math.Float32frombits(bits)
248 }
249 return result
250}
251
252// cosineDistance computes 1 - cosine_similarity (so lower is more similar)
253func cosineDistance(a, b []float32) float64 {
254 if len(a) != len(b) {
255 return 1.0
256 }
257
258 var dotProduct, normA, normB float64
259 for i := range a {
260 dotProduct += float64(a[i]) * float64(b[i])
261 normA += float64(a[i]) * float64(a[i])
262 normB += float64(b[i]) * float64(b[i])
263 }
264
265 if normA == 0 || normB == 0 {
266 return 1.0
267 }
268
269 similarity := dotProduct / (math.Sqrt(normA) * math.Sqrt(normB))
270 return 1.0 - similarity
271}