diff options
| author | bndw <ben@bdw.to> | 2026-03-06 07:47:10 -0800 |
|---|---|---|
| committer | bndw <ben@bdw.to> | 2026-03-06 07:47:10 -0800 |
| commit | 9b4e7b8ba88f65c9c4a77b461f8353cf706e2206 (patch) | |
| tree | 9c445bde32662f03cc569c1c25c091adb1aa7bab | |
| parent | f1ff85c7acad6b2ae7ec10720619ef2023cb7dc9 (diff) | |
| -rw-r--r-- | go.mod | 14 | ||||
| -rw-r--r-- | go.sum | 57 | ||||
| -rw-r--r-- | internal/index/index.go | 163 |
3 files changed, 72 insertions, 162 deletions
| @@ -3,24 +3,14 @@ module code.northwest.io/codevec | |||
| 3 | go 1.24.0 | 3 | go 1.24.0 |
| 4 | 4 | ||
| 5 | require ( | 5 | require ( |
| 6 | github.com/asg017/sqlite-vec-go-bindings v0.1.6 | ||
| 7 | github.com/mattn/go-sqlite3 v1.14.33 | ||
| 6 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 | 8 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 |
| 7 | github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 | 9 | github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 |
| 8 | github.com/spf13/cobra v1.10.2 | 10 | github.com/spf13/cobra v1.10.2 |
| 9 | modernc.org/sqlite v1.46.1 | ||
| 10 | ) | 11 | ) |
| 11 | 12 | ||
| 12 | require ( | 13 | require ( |
| 13 | github.com/dustin/go-humanize v1.0.1 // indirect | ||
| 14 | github.com/google/uuid v1.6.0 // indirect | ||
| 15 | github.com/inconshreveable/mousetrap v1.1.0 // indirect | 14 | github.com/inconshreveable/mousetrap v1.1.0 // indirect |
| 16 | github.com/mattn/go-isatty v0.0.20 // indirect | ||
| 17 | github.com/ncruces/go-strftime v1.0.0 // indirect | ||
| 18 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect | ||
| 19 | github.com/spf13/pflag v1.0.9 // indirect | 15 | github.com/spf13/pflag v1.0.9 // indirect |
| 20 | golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect | ||
| 21 | golang.org/x/sync v0.19.0 // indirect | ||
| 22 | golang.org/x/sys v0.40.0 // indirect | ||
| 23 | modernc.org/libc v1.67.6 // indirect | ||
| 24 | modernc.org/mathutil v1.7.1 // indirect | ||
| 25 | modernc.org/memory v1.11.0 // indirect | ||
| 26 | ) | 16 | ) |
| @@ -1,25 +1,15 @@ | |||
| 1 | github.com/asg017/sqlite-vec-go-bindings v0.1.6 h1:Nx0jAzyS38XpkKznJ9xQjFXz2X9tI7KqjwVxV8RNoww= | ||
| 2 | github.com/asg017/sqlite-vec-go-bindings v0.1.6/go.mod h1:A8+cTt/nKFsYCQF6OgzSNpKZrzNo5gQsXBTfsXHXY0Q= | ||
| 1 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= | 3 | github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= |
| 2 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= |
| 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= | 5 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= |
| 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | 6 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= |
| 5 | github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= | ||
| 6 | github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= | ||
| 7 | github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= | ||
| 8 | github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= | ||
| 9 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= | ||
| 10 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= | ||
| 11 | github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= | ||
| 12 | github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= | ||
| 13 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= | 7 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= |
| 14 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= | 8 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= |
| 15 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= | 9 | github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0= |
| 16 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= | 10 | github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= |
| 17 | github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= | ||
| 18 | github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= | ||
| 19 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | 11 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= |
| 20 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | 12 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= |
| 21 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= | ||
| 22 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= | ||
| 23 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= | 13 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= |
| 24 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= | 14 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI= |
| 25 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= | 15 | github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs= |
| @@ -34,46 +24,7 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ | |||
| 34 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= | 24 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= |
| 35 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= | 25 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= |
| 36 | go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= | 26 | go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= |
| 37 | golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= | ||
| 38 | golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= | ||
| 39 | golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA= | ||
| 40 | golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w= | ||
| 41 | golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= | ||
| 42 | golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= | ||
| 43 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= | ||
| 44 | golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= | ||
| 45 | golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= | ||
| 46 | golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ= | ||
| 47 | golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs= | ||
| 48 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | 27 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= |
| 49 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | 28 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= |
| 50 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= | 29 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= |
| 51 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | 30 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= |
| 52 | modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis= | ||
| 53 | modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0= | ||
| 54 | modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc= | ||
| 55 | modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM= | ||
| 56 | modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA= | ||
| 57 | modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc= | ||
| 58 | modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= | ||
| 59 | modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= | ||
| 60 | modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE= | ||
| 61 | modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY= | ||
| 62 | modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks= | ||
| 63 | modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI= | ||
| 64 | modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI= | ||
| 65 | modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE= | ||
| 66 | modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= | ||
| 67 | modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= | ||
| 68 | modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= | ||
| 69 | modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= | ||
| 70 | modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= | ||
| 71 | modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= | ||
| 72 | modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= | ||
| 73 | modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= | ||
| 74 | modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU= | ||
| 75 | modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA= | ||
| 76 | modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= | ||
| 77 | modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= | ||
| 78 | modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= | ||
| 79 | modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= | ||
diff --git a/internal/index/index.go b/internal/index/index.go index 008e487..5ce9f4f 100644 --- a/internal/index/index.go +++ b/internal/index/index.go | |||
| @@ -2,18 +2,16 @@ package index | |||
| 2 | 2 | ||
| 3 | import ( | 3 | import ( |
| 4 | "database/sql" | 4 | "database/sql" |
| 5 | "encoding/binary" | ||
| 6 | "math" | ||
| 7 | "os" | 5 | "os" |
| 8 | "path/filepath" | 6 | "path/filepath" |
| 9 | "sort" | ||
| 10 | 7 | ||
| 11 | _ "modernc.org/sqlite" | 8 | sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo" |
| 9 | _ "github.com/mattn/go-sqlite3" | ||
| 12 | 10 | ||
| 13 | "code.northwest.io/codevec/internal/chunker" | 11 | "code.northwest.io/codevec/internal/chunker" |
| 14 | ) | 12 | ) |
| 15 | 13 | ||
| 16 | // Index stores chunks and embeddings in SQLite | 14 | // Index stores chunks and embeddings in SQLite with sqlite-vec |
| 17 | type Index struct { | 15 | type Index struct { |
| 18 | db *sql.DB | 16 | db *sql.DB |
| 19 | dims int | 17 | dims int |
| @@ -21,13 +19,16 @@ type Index struct { | |||
| 21 | 19 | ||
| 22 | // Open opens or creates an index at the given path | 20 | // Open opens or creates an index at the given path |
| 23 | func Open(path string, dims int) (*Index, error) { | 21 | func Open(path string, dims int) (*Index, error) { |
| 22 | // Register sqlite-vec extension | ||
| 23 | sqlite_vec.Auto() | ||
| 24 | |||
| 24 | // Ensure directory exists | 25 | // Ensure directory exists |
| 25 | dir := filepath.Dir(path) | 26 | dir := filepath.Dir(path) |
| 26 | if err := os.MkdirAll(dir, 0755); err != nil { | 27 | if err := os.MkdirAll(dir, 0755); err != nil { |
| 27 | return nil, err | 28 | return nil, err |
| 28 | } | 29 | } |
| 29 | 30 | ||
| 30 | db, err := sql.Open("sqlite", path) | 31 | db, err := sql.Open("sqlite3", path) |
| 31 | if err != nil { | 32 | if err != nil { |
| 32 | return nil, err | 33 | return nil, err |
| 33 | } | 34 | } |
| @@ -42,7 +43,7 @@ func Open(path string, dims int) (*Index, error) { | |||
| 42 | } | 43 | } |
| 43 | 44 | ||
| 44 | func (idx *Index) init() error { | 45 | func (idx *Index) init() error { |
| 45 | // Create chunks table with embedding column | 46 | // Create chunks table |
| 46 | _, err := idx.db.Exec(` | 47 | _, err := idx.db.Exec(` |
| 47 | CREATE TABLE IF NOT EXISTS chunks ( | 48 | CREATE TABLE IF NOT EXISTS chunks ( |
| 48 | id INTEGER PRIMARY KEY, | 49 | id INTEGER PRIMARY KEY, |
| @@ -53,7 +54,6 @@ func (idx *Index) init() error { | |||
| 53 | name TEXT, | 54 | name TEXT, |
| 54 | content TEXT NOT NULL, | 55 | content TEXT NOT NULL, |
| 55 | hash TEXT NOT NULL, | 56 | hash TEXT NOT NULL, |
| 56 | embedding BLOB, | ||
| 57 | created_at INTEGER DEFAULT (unixepoch()) | 57 | created_at INTEGER DEFAULT (unixepoch()) |
| 58 | ) | 58 | ) |
| 59 | `) | 59 | `) |
| @@ -84,6 +84,17 @@ func (idx *Index) init() error { | |||
| 84 | return err | 84 | return err |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | // Create vec0 virtual table for vectors | ||
| 88 | _, err = idx.db.Exec(` | ||
| 89 | CREATE VIRTUAL TABLE IF NOT EXISTS vectors USING vec0( | ||
| 90 | chunk_id INTEGER PRIMARY KEY, | ||
| 91 | embedding FLOAT[768] distance_metric=cosine | ||
| 92 | ) | ||
| 93 | `) | ||
| 94 | if err != nil { | ||
| 95 | return err | ||
| 96 | } | ||
| 97 | |||
| 87 | // Index on file for faster deletion | 98 | // Index on file for faster deletion |
| 88 | _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`) | 99 | _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`) |
| 89 | return err | 100 | return err |
| @@ -96,11 +107,27 @@ func (idx *Index) Close() error { | |||
| 96 | 107 | ||
| 97 | // InsertChunk inserts a chunk with its embedding | 108 | // InsertChunk inserts a chunk with its embedding |
| 98 | func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error { | 109 | func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error { |
| 99 | embeddingBlob := serializeEmbedding(embedding) | 110 | // Insert chunk |
| 100 | _, err := idx.db.Exec(` | 111 | result, err := idx.db.Exec(` |
| 101 | INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash, embedding) | 112 | INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash) |
| 102 | VALUES (?, ?, ?, ?, ?, ?, ?, ?) | 113 | VALUES (?, ?, ?, ?, ?, ?, ?) |
| 103 | `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash, embeddingBlob) | 114 | `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash) |
| 115 | if err != nil { | ||
| 116 | return err | ||
| 117 | } | ||
| 118 | |||
| 119 | chunkID, err := result.LastInsertId() | ||
| 120 | if err != nil { | ||
| 121 | return err | ||
| 122 | } | ||
| 123 | |||
| 124 | // Insert vector | ||
| 125 | vecBlob, err := sqlite_vec.SerializeFloat32(embedding) | ||
| 126 | if err != nil { | ||
| 127 | return err | ||
| 128 | } | ||
| 129 | |||
| 130 | _, err = idx.db.Exec(`INSERT INTO vectors (chunk_id, embedding) VALUES (?, ?)`, chunkID, vecBlob) | ||
| 104 | return err | 131 | return err |
| 105 | } | 132 | } |
| 106 | 133 | ||
| @@ -110,62 +137,39 @@ type SearchResult struct { | |||
| 110 | Distance float64 | 137 | Distance float64 |
| 111 | } | 138 | } |
| 112 | 139 | ||
| 113 | // Search finds chunks similar to the query embedding using cosine similarity | 140 | // Search finds chunks similar to the query embedding using sqlite-vec |
| 114 | func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) { | 141 | func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) { |
| 115 | // Load all embeddings | 142 | vecBlob, err := sqlite_vec.SerializeFloat32(queryEmb) |
| 116 | rows, err := idx.db.Query(` | ||
| 117 | SELECT id, file, start_line, end_line, chunk_type, name, content, hash, embedding | ||
| 118 | FROM chunks | ||
| 119 | WHERE embedding IS NOT NULL | ||
| 120 | `) | ||
| 121 | if err != nil { | 143 | if err != nil { |
| 122 | return nil, err | 144 | return nil, err |
| 123 | } | 145 | } |
| 124 | defer rows.Close() | ||
| 125 | 146 | ||
| 126 | type candidate struct { | 147 | // Query similar vectors |
| 127 | chunk chunker.Chunk | 148 | rows, err := idx.db.Query(` |
| 128 | distance float64 | 149 | SELECT v.chunk_id, v.distance, c.file, c.start_line, c.end_line, c.chunk_type, c.name, c.content, c.hash |
| 150 | FROM vectors v | ||
| 151 | JOIN chunks c ON c.id = v.chunk_id | ||
| 152 | WHERE v.embedding MATCH ? AND k = ? | ||
| 153 | ORDER BY v.distance | ||
| 154 | `, vecBlob, limit) | ||
| 155 | if err != nil { | ||
| 156 | return nil, err | ||
| 129 | } | 157 | } |
| 130 | var candidates []candidate | 158 | defer rows.Close() |
| 131 | 159 | ||
| 160 | var results []SearchResult | ||
| 132 | for rows.Next() { | 161 | for rows.Next() { |
| 133 | var id int64 | 162 | var chunkID int64 |
| 163 | var distance float64 | ||
| 134 | var c chunker.Chunk | 164 | var c chunker.Chunk |
| 135 | var embBlob []byte | 165 | err := rows.Scan(&chunkID, &distance, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash) |
| 136 | err := rows.Scan(&id, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash, &embBlob) | ||
| 137 | if err != nil { | 166 | if err != nil { |
| 138 | return nil, err | 167 | return nil, err |
| 139 | } | 168 | } |
| 140 | 169 | results = append(results, SearchResult{Chunk: c, Distance: distance}) | |
| 141 | emb := deserializeEmbedding(embBlob) | ||
| 142 | dist := cosineDistance(queryEmb, emb) | ||
| 143 | candidates = append(candidates, candidate{chunk: c, distance: dist}) | ||
| 144 | } | ||
| 145 | |||
| 146 | if err := rows.Err(); err != nil { | ||
| 147 | return nil, err | ||
| 148 | } | ||
| 149 | |||
| 150 | // Sort by distance (lower is better) | ||
| 151 | sort.Slice(candidates, func(i, j int) bool { | ||
| 152 | return candidates[i].distance < candidates[j].distance | ||
| 153 | }) | ||
| 154 | |||
| 155 | // Return top-k | ||
| 156 | if limit > len(candidates) { | ||
| 157 | limit = len(candidates) | ||
| 158 | } | 170 | } |
| 159 | 171 | ||
| 160 | results := make([]SearchResult, limit) | 172 | return results, rows.Err() |
| 161 | for i := 0; i < limit; i++ { | ||
| 162 | results[i] = SearchResult{ | ||
| 163 | Chunk: candidates[i].chunk, | ||
| 164 | Distance: candidates[i].distance, | ||
| 165 | } | ||
| 166 | } | ||
| 167 | |||
| 168 | return results, nil | ||
| 169 | } | 173 | } |
| 170 | 174 | ||
| 171 | // GetFileHash returns the stored hash for a file, or empty string if not indexed | 175 | // GetFileHash returns the stored hash for a file, or empty string if not indexed |
| @@ -189,7 +193,13 @@ func (idx *Index) SetFileHash(path, hash string) error { | |||
| 189 | 193 | ||
| 190 | // DeleteChunksForFile removes all chunks for a file | 194 | // DeleteChunksForFile removes all chunks for a file |
| 191 | func (idx *Index) DeleteChunksForFile(path string) error { | 195 | func (idx *Index) DeleteChunksForFile(path string) error { |
| 192 | _, err := idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path) | 196 | // Delete vectors for chunks in this file |
| 197 | _, err := idx.db.Exec(`DELETE FROM vectors WHERE chunk_id IN (SELECT id FROM chunks WHERE file = ?)`, path) | ||
| 198 | if err != nil { | ||
| 199 | return err | ||
| 200 | } | ||
| 201 | |||
| 202 | _, err = idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path) | ||
| 193 | if err != nil { | 203 | if err != nil { |
| 194 | return err | 204 | return err |
| 195 | } | 205 | } |
| @@ -228,44 +238,3 @@ func (idx *Index) GetMetadata(key string) (string, error) { | |||
| 228 | } | 238 | } |
| 229 | return value, err | 239 | return value, err |
| 230 | } | 240 | } |
| 231 | |||
| 232 | // serializeEmbedding converts float32 slice to bytes | ||
| 233 | func serializeEmbedding(embedding []float32) []byte { | ||
| 234 | buf := make([]byte, len(embedding)*4) | ||
| 235 | for i, v := range embedding { | ||
| 236 | binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(v)) | ||
| 237 | } | ||
| 238 | return buf | ||
| 239 | } | ||
| 240 | |||
| 241 | // deserializeEmbedding converts bytes back to float32 slice | ||
| 242 | func deserializeEmbedding(data []byte) []float32 { | ||
| 243 | n := len(data) / 4 | ||
| 244 | result := make([]float32, n) | ||
| 245 | for i := 0; i < n; i++ { | ||
| 246 | bits := binary.LittleEndian.Uint32(data[i*4:]) | ||
| 247 | result[i] = math.Float32frombits(bits) | ||
| 248 | } | ||
| 249 | return result | ||
| 250 | } | ||
| 251 | |||
| 252 | // cosineDistance computes 1 - cosine_similarity (so lower is more similar) | ||
| 253 | func cosineDistance(a, b []float32) float64 { | ||
| 254 | if len(a) != len(b) { | ||
| 255 | return 1.0 | ||
| 256 | } | ||
| 257 | |||
| 258 | var dotProduct, normA, normB float64 | ||
| 259 | for i := range a { | ||
| 260 | dotProduct += float64(a[i]) * float64(b[i]) | ||
| 261 | normA += float64(a[i]) * float64(a[i]) | ||
| 262 | normB += float64(b[i]) * float64(b[i]) | ||
| 263 | } | ||
| 264 | |||
| 265 | if normA == 0 || normB == 0 { | ||
| 266 | return 1.0 | ||
| 267 | } | ||
| 268 | |||
| 269 | similarity := dotProduct / (math.Sqrt(normA) * math.Sqrt(normB)) | ||
| 270 | return 1.0 - similarity | ||
| 271 | } | ||
