aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--Makefile13
-rw-r--r--cmd/codevec/main.go369
-rw-r--r--go.mod26
-rw-r--r--go.sum79
-rw-r--r--internal/chunker/chunker.go185
-rw-r--r--internal/embedder/embedder.go222
-rw-r--r--internal/index/index.go271
-rw-r--r--internal/walker/walker.go109
9 files changed, 1276 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..96f1663
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
1bin/
2.codevec/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..5f37491
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
1.PHONY: build install clean
2
3BINARY := codevec
4BUILD_DIR := bin
5
6build:
7 go build -o $(BUILD_DIR)/$(BINARY) ./cmd/codevec
8
9install: build
10 cp $(BUILD_DIR)/$(BINARY) ~/.local/bin/
11
12clean:
13 rm -rf $(BUILD_DIR)
diff --git a/cmd/codevec/main.go b/cmd/codevec/main.go
new file mode 100644
index 0000000..8337367
--- /dev/null
+++ b/cmd/codevec/main.go
@@ -0,0 +1,369 @@
1package main
2
3import (
4 "context"
5 "crypto/sha256"
6 "encoding/json"
7 "fmt"
8 "os"
9 "path/filepath"
10 "strings"
11
12 "github.com/spf13/cobra"
13
14 "code.northwest.io/codevec/internal/chunker"
15 "code.northwest.io/codevec/internal/embedder"
16 "code.northwest.io/codevec/internal/index"
17 "code.northwest.io/codevec/internal/walker"
18)
19
20const codevecDir = ".codevec"
21const indexFile = "index.db"
22
23var rootCmd = &cobra.Command{
24 Use: "codevec",
25 Short: "Semantic code search via embeddings",
26 Long: `Index your codebase and query by concept. Get relevant code chunks with file paths and line numbers.`,
27}
28
29var indexCmd = &cobra.Command{
30 Use: "index [path]",
31 Short: "Index a directory for semantic search",
32 Args: cobra.MaximumNArgs(1),
33 RunE: runIndex,
34}
35
36var queryCmd = &cobra.Command{
37 Use: "query <text>",
38 Short: "Search for relevant code",
39 Args: cobra.ExactArgs(1),
40 RunE: runQuery,
41}
42
43var statusCmd = &cobra.Command{
44 Use: "status",
45 Short: "Show index statistics",
46 RunE: runStatus,
47}
48
49func init() {
50 // index flags
51 indexCmd.Flags().BoolP("force", "f", false, "Re-index everything")
52 indexCmd.Flags().BoolP("verbose", "v", false, "Show progress")
53 indexCmd.Flags().StringP("provider", "p", "ollama", "Embedding provider (ollama, openai)")
54 indexCmd.Flags().StringP("model", "m", "", "Embedding model (default: provider-specific)")
55
56 // query flags
57 queryCmd.Flags().IntP("limit", "l", 10, "Max results")
58 queryCmd.Flags().Float64P("threshold", "t", 0.0, "Min similarity score (0-1)")
59 queryCmd.Flags().BoolP("show", "s", false, "Print chunk content")
60 queryCmd.Flags().Bool("json", false, "Output as JSON")
61
62 rootCmd.AddCommand(indexCmd)
63 rootCmd.AddCommand(queryCmd)
64 rootCmd.AddCommand(statusCmd)
65}
66
67func main() {
68 if err := rootCmd.Execute(); err != nil {
69 os.Exit(1)
70 }
71}
72
73func runIndex(cmd *cobra.Command, args []string) error {
74 path := "."
75 if len(args) > 0 {
76 path = args[0]
77 }
78
79 force, _ := cmd.Flags().GetBool("force")
80 verbose, _ := cmd.Flags().GetBool("verbose")
81 provider, _ := cmd.Flags().GetString("provider")
82 model, _ := cmd.Flags().GetString("model")
83
84 // Resolve absolute path
85 absPath, err := filepath.Abs(path)
86 if err != nil {
87 return err
88 }
89
90 // Create embedder
91 emb, err := embedder.New(provider, model)
92 if err != nil {
93 return err
94 }
95
96 // Open index
97 indexPath := filepath.Join(absPath, codevecDir, indexFile)
98 idx, err := index.Open(indexPath, emb.Dimensions())
99 if err != nil {
100 return fmt.Errorf("failed to open index: %w", err)
101 }
102 defer idx.Close()
103
104 // Store metadata
105 idx.SetMetadata("provider", provider)
106 if model != "" {
107 idx.SetMetadata("model", model)
108 }
109 idx.SetMetadata("dimensions", fmt.Sprintf("%d", emb.Dimensions()))
110
111 // Walk directory
112 w, err := walker.New(absPath, []string{".go"})
113 if err != nil {
114 return err
115 }
116
117 files, err := w.Walk()
118 if err != nil {
119 return err
120 }
121
122 if verbose {
123 fmt.Printf("Found %d Go files\n", len(files))
124 }
125
126 // Create chunker
127 goChunker := chunker.NewGoChunker()
128
129 // Process files
130 var totalChunks int
131 var skipped int
132 ctx := context.Background()
133
134 for _, file := range files {
135 // Read file content
136 content, err := os.ReadFile(file)
137 if err != nil {
138 fmt.Fprintf(os.Stderr, "Warning: failed to read %s: %v\n", file, err)
139 continue
140 }
141
142 // Compute file hash
143 fileHash := fmt.Sprintf("%x", sha256.Sum256(content))
144
145 // Check if already indexed
146 if !force {
147 existingHash, _ := idx.GetFileHash(file)
148 if existingHash == fileHash {
149 skipped++
150 continue
151 }
152 }
153
154 // Delete old chunks for this file
155 idx.DeleteChunksForFile(file)
156
157 // Chunk file
158 chunks, err := goChunker.Chunk(file, content)
159 if err != nil {
160 fmt.Fprintf(os.Stderr, "Warning: failed to parse %s: %v\n", file, err)
161 continue
162 }
163
164 if len(chunks) == 0 {
165 continue
166 }
167
168 // Generate embeddings
169 texts := make([]string, len(chunks))
170 for i, c := range chunks {
171 // Include file path and name for context
172 relPath, _ := filepath.Rel(absPath, c.File)
173 texts[i] = fmt.Sprintf("File: %s\n%s %s\n\n%s", relPath, c.Type, c.Name, c.Content)
174 }
175
176 embeddings, err := emb.Embed(ctx, texts)
177 if err != nil {
178 return fmt.Errorf("embedding failed for %s: %w", file, err)
179 }
180
181 // Store chunks and embeddings
182 for i, chunk := range chunks {
183 if err := idx.InsertChunk(chunk, embeddings[i]); err != nil {
184 return fmt.Errorf("failed to insert chunk: %w", err)
185 }
186 }
187
188 // Update file hash
189 idx.SetFileHash(file, fileHash)
190
191 totalChunks += len(chunks)
192 if verbose {
193 relPath, _ := filepath.Rel(absPath, file)
194 fmt.Printf(" %s: %d chunks\n", relPath, len(chunks))
195 }
196 }
197
198 fmt.Printf("Indexed %d chunks from %d files", totalChunks, len(files)-skipped)
199 if skipped > 0 {
200 fmt.Printf(" (%d unchanged)", skipped)
201 }
202 fmt.Println()
203
204 return nil
205}
206
207func runQuery(cmd *cobra.Command, args []string) error {
208 query := args[0]
209 limit, _ := cmd.Flags().GetInt("limit")
210 threshold, _ := cmd.Flags().GetFloat64("threshold")
211 show, _ := cmd.Flags().GetBool("show")
212 jsonOutput, _ := cmd.Flags().GetBool("json")
213
214 // Find index
215 cwd, err := os.Getwd()
216 if err != nil {
217 return err
218 }
219 indexPath := filepath.Join(cwd, codevecDir, indexFile)
220
221 if _, err := os.Stat(indexPath); os.IsNotExist(err) {
222 return fmt.Errorf("no index found. Run 'codevec index' first")
223 }
224
225 // Get provider/model from metadata
226 idx, err := index.Open(indexPath, 768) // temp dims, we'll read from metadata
227 if err != nil {
228 return err
229 }
230
231 provider, _ := idx.GetMetadata("provider")
232 model, _ := idx.GetMetadata("model")
233 idx.Close()
234
235 if provider == "" {
236 provider = "ollama"
237 }
238
239 // Create embedder
240 emb, err := embedder.New(provider, model)
241 if err != nil {
242 return err
243 }
244
245 // Reopen with correct dimensions
246 idx, err = index.Open(indexPath, emb.Dimensions())
247 if err != nil {
248 return err
249 }
250 defer idx.Close()
251
252 // Generate query embedding
253 ctx := context.Background()
254 embeddings, err := emb.Embed(ctx, []string{query})
255 if err != nil {
256 return fmt.Errorf("failed to embed query: %w", err)
257 }
258
259 // Search
260 results, err := idx.Search(embeddings[0], limit)
261 if err != nil {
262 return fmt.Errorf("search failed: %w", err)
263 }
264
265 // Filter by threshold (distance is lower = more similar)
266 // Convert distance to similarity for threshold comparison
267 var filtered []index.SearchResult
268 for _, r := range results {
269 similarity := 1 - r.Distance
270 if similarity >= threshold {
271 filtered = append(filtered, r)
272 }
273 }
274 results = filtered
275
276 // Output
277 if jsonOutput {
278 type jsonResult struct {
279 File string `json:"file"`
280 StartLine int `json:"start_line"`
281 EndLine int `json:"end_line"`
282 Type string `json:"type"`
283 Name string `json:"name"`
284 Score float64 `json:"score"`
285 Content string `json:"content,omitempty"`
286 }
287
288 var output []jsonResult
289 for _, r := range results {
290 relPath, _ := filepath.Rel(cwd, r.Chunk.File)
291 jr := jsonResult{
292 File: relPath,
293 StartLine: r.Chunk.StartLine,
294 EndLine: r.Chunk.EndLine,
295 Type: r.Chunk.Type,
296 Name: r.Chunk.Name,
297 Score: 1 - r.Distance,
298 }
299 if show {
300 jr.Content = r.Chunk.Content
301 }
302 output = append(output, jr)
303 }
304
305 enc := json.NewEncoder(os.Stdout)
306 enc.SetIndent("", " ")
307 return enc.Encode(output)
308 }
309
310 // Text output
311 if len(results) == 0 {
312 fmt.Println("No results found")
313 return nil
314 }
315
316 for _, r := range results {
317 relPath, _ := filepath.Rel(cwd, r.Chunk.File)
318 similarity := 1 - r.Distance
319 fmt.Printf("%s:%d-%d %s (%.2f)\n", relPath, r.Chunk.StartLine, r.Chunk.EndLine, r.Chunk.Name, similarity)
320 if show {
321 fmt.Println(strings.Repeat("-", 40))
322 fmt.Println(r.Chunk.Content)
323 fmt.Println()
324 }
325 }
326
327 return nil
328}
329
330func runStatus(cmd *cobra.Command, args []string) error {
331 cwd, err := os.Getwd()
332 if err != nil {
333 return err
334 }
335 indexPath := filepath.Join(cwd, codevecDir, indexFile)
336
337 if _, err := os.Stat(indexPath); os.IsNotExist(err) {
338 fmt.Println("No index found. Run 'codevec index' first.")
339 return nil
340 }
341
342 idx, err := index.Open(indexPath, 768)
343 if err != nil {
344 return err
345 }
346 defer idx.Close()
347
348 stats, err := idx.Stats()
349 if err != nil {
350 return err
351 }
352
353 provider, _ := idx.GetMetadata("provider")
354 model, _ := idx.GetMetadata("model")
355 dims, _ := idx.GetMetadata("dimensions")
356
357 fmt.Printf("Index: %s\n", indexPath)
358 fmt.Printf("Files: %d\n", stats.Files)
359 fmt.Printf("Chunks: %d\n", stats.Chunks)
360 fmt.Printf("Provider: %s\n", provider)
361 if model != "" {
362 fmt.Printf("Model: %s\n", model)
363 }
364 if dims != "" {
365 fmt.Printf("Dimensions: %s\n", dims)
366 }
367
368 return nil
369}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..914896e
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,26 @@
1module code.northwest.io/codevec
2
3go 1.24.0
4
5require (
6 github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06
7 github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82
8 github.com/spf13/cobra v1.10.2
9 modernc.org/sqlite v1.46.1
10)
11
12require (
13 github.com/dustin/go-humanize v1.0.1 // indirect
14 github.com/google/uuid v1.6.0 // indirect
15 github.com/inconshreveable/mousetrap v1.1.0 // indirect
16 github.com/mattn/go-isatty v0.0.20 // indirect
17 github.com/ncruces/go-strftime v1.0.0 // indirect
18 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
19 github.com/spf13/pflag v1.0.9 // indirect
20 golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
21 golang.org/x/sync v0.19.0 // indirect
22 golang.org/x/sys v0.40.0 // indirect
23 modernc.org/libc v1.67.6 // indirect
24 modernc.org/mathutil v1.7.1 // indirect
25 modernc.org/memory v1.11.0 // indirect
26)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..0d5edbc
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,79 @@
1github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
2github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
3github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
6github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
7github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
8github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
9github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
10github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
11github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
12github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
13github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
14github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
15github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
16github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
17github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
18github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
19github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
20github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
21github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
22github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
23github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
24github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDjyw0ULyrTYWeN0UNCCkmCWfjPnIA2W6oviI=
25github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
26github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4=
27github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw=
28github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU=
29github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4=
30github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY=
31github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
32github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
33github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
34github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
35github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
36go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
37golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
38golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
39golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
40golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
41golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
42golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
43golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
44golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ=
45golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
46golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
47golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
48gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
49gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
50gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
51gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
52modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
53modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
54modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
55modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
56modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
57modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
58modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
59modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
60modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
61modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
62modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
63modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
64modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
65modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
66modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
67modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
68modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
69modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
70modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
71modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
72modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
73modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
74modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU=
75modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
76modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
77modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
78modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
79modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
diff --git a/internal/chunker/chunker.go b/internal/chunker/chunker.go
new file mode 100644
index 0000000..f8de08d
--- /dev/null
+++ b/internal/chunker/chunker.go
@@ -0,0 +1,185 @@
1package chunker
2
3import (
4 "crypto/sha256"
5 "fmt"
6 "os"
7 "strings"
8
9 sitter "github.com/smacker/go-tree-sitter"
10 "github.com/smacker/go-tree-sitter/golang"
11)
12
13// Chunk represents a semantically meaningful piece of code
14type Chunk struct {
15 File string
16 StartLine int
17 EndLine int
18 Type string // "function", "method", "type"
19 Name string
20 Content string
21 Hash string
22}
23
24// Chunker extracts semantic chunks from source code
25type Chunker interface {
26 Chunk(path string, content []byte) ([]Chunk, error)
27}
28
29// GoChunker extracts chunks from Go source files using tree-sitter
30type GoChunker struct {
31 parser *sitter.Parser
32}
33
34// NewGoChunker creates a new Go chunker
35func NewGoChunker() *GoChunker {
36 parser := sitter.NewParser()
37 parser.SetLanguage(golang.GetLanguage())
38 return &GoChunker{parser: parser}
39}
40
41// ChunkFile reads and chunks a file
42func (c *GoChunker) ChunkFile(path string) ([]Chunk, error) {
43 content, err := os.ReadFile(path)
44 if err != nil {
45 return nil, err
46 }
47 return c.Chunk(path, content)
48}
49
50// Chunk extracts semantic chunks from Go source
51func (c *GoChunker) Chunk(path string, content []byte) ([]Chunk, error) {
52 tree := c.parser.Parse(nil, content)
53 if tree == nil {
54 return nil, fmt.Errorf("failed to parse %s", path)
55 }
56 defer tree.Close()
57
58 var chunks []Chunk
59 root := tree.RootNode()
60
61 // Walk top-level declarations
62 for i := 0; i < int(root.ChildCount()); i++ {
63 node := root.Child(i)
64 chunk := c.extractChunk(node, content, path)
65 if chunk != nil {
66 chunks = append(chunks, *chunk)
67 }
68 }
69
70 return chunks, nil
71}
72
73func (c *GoChunker) extractChunk(node *sitter.Node, content []byte, path string) *Chunk {
74 nodeType := node.Type()
75
76 switch nodeType {
77 case "function_declaration":
78 return c.extractFunction(node, content, path)
79 case "method_declaration":
80 return c.extractMethod(node, content, path)
81 case "type_declaration":
82 return c.extractType(node, content, path)
83 }
84
85 return nil
86}
87
88func (c *GoChunker) extractFunction(node *sitter.Node, content []byte, path string) *Chunk {
89 nameNode := node.ChildByFieldName("name")
90 if nameNode == nil {
91 return nil
92 }
93
94 name := string(content[nameNode.StartByte():nameNode.EndByte()])
95 text := string(content[node.StartByte():node.EndByte()])
96
97 return &Chunk{
98 File: path,
99 StartLine: int(node.StartPoint().Row) + 1,
100 EndLine: int(node.EndPoint().Row) + 1,
101 Type: "function",
102 Name: name,
103 Content: text,
104 Hash: hash(text),
105 }
106}
107
108func (c *GoChunker) extractMethod(node *sitter.Node, content []byte, path string) *Chunk {
109 nameNode := node.ChildByFieldName("name")
110 receiverNode := node.ChildByFieldName("receiver")
111 if nameNode == nil {
112 return nil
113 }
114
115 name := string(content[nameNode.StartByte():nameNode.EndByte()])
116
117 // Build receiver prefix like (*Server) or (s Server)
118 if receiverNode != nil {
119 recvText := string(content[receiverNode.StartByte():receiverNode.EndByte()])
120 // Extract type from receiver, e.g., "(s *Server)" -> "*Server"
121 recvType := extractReceiverType(recvText)
122 if recvType != "" {
123 name = fmt.Sprintf("(%s).%s", recvType, name)
124 }
125 }
126
127 text := string(content[node.StartByte():node.EndByte()])
128
129 return &Chunk{
130 File: path,
131 StartLine: int(node.StartPoint().Row) + 1,
132 EndLine: int(node.EndPoint().Row) + 1,
133 Type: "method",
134 Name: name,
135 Content: text,
136 Hash: hash(text),
137 }
138}
139
140func (c *GoChunker) extractType(node *sitter.Node, content []byte, path string) *Chunk {
141 // type_declaration contains type_spec children
142 for i := 0; i < int(node.ChildCount()); i++ {
143 child := node.Child(i)
144 if child.Type() == "type_spec" {
145 nameNode := child.ChildByFieldName("name")
146 if nameNode == nil {
147 continue
148 }
149
150 name := string(content[nameNode.StartByte():nameNode.EndByte()])
151 text := string(content[node.StartByte():node.EndByte()])
152
153 return &Chunk{
154 File: path,
155 StartLine: int(node.StartPoint().Row) + 1,
156 EndLine: int(node.EndPoint().Row) + 1,
157 Type: "type",
158 Name: name,
159 Content: text,
160 Hash: hash(text),
161 }
162 }
163 }
164 return nil
165}
166
167// extractReceiverType extracts the type from a receiver like "(s *Server)" -> "*Server"
168func extractReceiverType(recv string) string {
169 // Remove parens
170 recv = strings.TrimPrefix(recv, "(")
171 recv = strings.TrimSuffix(recv, ")")
172 recv = strings.TrimSpace(recv)
173
174 // Split on space, take last part (the type)
175 parts := strings.Fields(recv)
176 if len(parts) == 0 {
177 return ""
178 }
179 return parts[len(parts)-1]
180}
181
182func hash(s string) string {
183 h := sha256.Sum256([]byte(s))
184 return fmt.Sprintf("%x", h[:8]) // First 8 bytes = 16 hex chars
185}
diff --git a/internal/embedder/embedder.go b/internal/embedder/embedder.go
new file mode 100644
index 0000000..42f8518
--- /dev/null
+++ b/internal/embedder/embedder.go
@@ -0,0 +1,222 @@
1package embedder
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "fmt"
8 "net/http"
9 "os"
10)
11
12// Embedder generates embeddings for text
13type Embedder interface {
14 Embed(ctx context.Context, texts []string) ([][]float32, error)
15 Dimensions() int
16}
17
18// OllamaEmbedder uses Ollama's embedding API
19type OllamaEmbedder struct {
20 baseURL string
21 model string
22 dims int
23}
24
25// NewOllamaEmbedder creates an Ollama embedder
26func NewOllamaEmbedder(model string) *OllamaEmbedder {
27 baseURL := os.Getenv("CODEVEC_BASE_URL")
28 if baseURL == "" {
29 baseURL = "http://localhost:11434"
30 }
31 if model == "" {
32 model = "nomic-embed-text"
33 }
34
35 // Model dimensions
36 dims := 768 // nomic-embed-text default
37 switch model {
38 case "mxbai-embed-large":
39 dims = 1024
40 case "all-minilm":
41 dims = 384
42 }
43
44 return &OllamaEmbedder{
45 baseURL: baseURL,
46 model: model,
47 dims: dims,
48 }
49}
50
51func (e *OllamaEmbedder) Dimensions() int {
52 return e.dims
53}
54
55type ollamaRequest struct {
56 Model string `json:"model"`
57 Prompt string `json:"prompt"`
58}
59
60type ollamaResponse struct {
61 Embedding []float32 `json:"embedding"`
62}
63
64func (e *OllamaEmbedder) Embed(ctx context.Context, texts []string) ([][]float32, error) {
65 embeddings := make([][]float32, len(texts))
66
67 // Ollama's /api/embeddings takes one prompt at a time
68 for i, text := range texts {
69 req := ollamaRequest{
70 Model: e.model,
71 Prompt: text,
72 }
73
74 body, err := json.Marshal(req)
75 if err != nil {
76 return nil, err
77 }
78
79 httpReq, err := http.NewRequestWithContext(ctx, "POST", e.baseURL+"/api/embeddings", bytes.NewReader(body))
80 if err != nil {
81 return nil, err
82 }
83 httpReq.Header.Set("Content-Type", "application/json")
84
85 resp, err := http.DefaultClient.Do(httpReq)
86 if err != nil {
87 return nil, fmt.Errorf("ollama request failed: %w", err)
88 }
89 defer resp.Body.Close()
90
91 if resp.StatusCode != http.StatusOK {
92 return nil, fmt.Errorf("ollama returned status %d", resp.StatusCode)
93 }
94
95 var result ollamaResponse
96 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
97 return nil, err
98 }
99
100 embeddings[i] = result.Embedding
101 }
102
103 return embeddings, nil
104}
105
106// OpenAIEmbedder uses OpenAI-compatible embedding API
107type OpenAIEmbedder struct {
108 baseURL string
109 apiKey string
110 model string
111 dims int
112}
113
114// NewOpenAIEmbedder creates an OpenAI-compatible embedder
115func NewOpenAIEmbedder(model string) *OpenAIEmbedder {
116 baseURL := os.Getenv("CODEVEC_BASE_URL")
117 if baseURL == "" {
118 baseURL = "https://api.openai.com"
119 }
120 apiKey := os.Getenv("CODEVEC_API_KEY")
121 if model == "" {
122 model = "text-embedding-3-small"
123 }
124
125 dims := 1536 // text-embedding-3-small default
126 switch model {
127 case "text-embedding-3-large":
128 dims = 3072
129 case "text-embedding-ada-002":
130 dims = 1536
131 }
132
133 return &OpenAIEmbedder{
134 baseURL: baseURL,
135 apiKey: apiKey,
136 model: model,
137 dims: dims,
138 }
139}
140
141func (e *OpenAIEmbedder) Dimensions() int {
142 return e.dims
143}
144
145type openaiRequest struct {
146 Model string `json:"model"`
147 Input []string `json:"input"`
148}
149
150type openaiResponse struct {
151 Data []struct {
152 Embedding []float32 `json:"embedding"`
153 } `json:"data"`
154}
155
156func (e *OpenAIEmbedder) Embed(ctx context.Context, texts []string) ([][]float32, error) {
157 if e.apiKey == "" {
158 return nil, fmt.Errorf("CODEVEC_API_KEY not set")
159 }
160
161 // Batch in groups of 100
162 const batchSize = 100
163 embeddings := make([][]float32, len(texts))
164
165 for start := 0; start < len(texts); start += batchSize {
166 end := start + batchSize
167 if end > len(texts) {
168 end = len(texts)
169 }
170 batch := texts[start:end]
171
172 req := openaiRequest{
173 Model: e.model,
174 Input: batch,
175 }
176
177 body, err := json.Marshal(req)
178 if err != nil {
179 return nil, err
180 }
181
182 httpReq, err := http.NewRequestWithContext(ctx, "POST", e.baseURL+"/v1/embeddings", bytes.NewReader(body))
183 if err != nil {
184 return nil, err
185 }
186 httpReq.Header.Set("Content-Type", "application/json")
187 httpReq.Header.Set("Authorization", "Bearer "+e.apiKey)
188
189 resp, err := http.DefaultClient.Do(httpReq)
190 if err != nil {
191 return nil, fmt.Errorf("openai request failed: %w", err)
192 }
193 defer resp.Body.Close()
194
195 if resp.StatusCode != http.StatusOK {
196 return nil, fmt.Errorf("openai returned status %d", resp.StatusCode)
197 }
198
199 var result openaiResponse
200 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
201 return nil, err
202 }
203
204 for i, d := range result.Data {
205 embeddings[start+i] = d.Embedding
206 }
207 }
208
209 return embeddings, nil
210}
211
212// New creates an embedder based on provider name
213func New(provider, model string) (Embedder, error) {
214 switch provider {
215 case "ollama":
216 return NewOllamaEmbedder(model), nil
217 case "openai":
218 return NewOpenAIEmbedder(model), nil
219 default:
220 return nil, fmt.Errorf("unknown provider: %s", provider)
221 }
222}
diff --git a/internal/index/index.go b/internal/index/index.go
new file mode 100644
index 0000000..008e487
--- /dev/null
+++ b/internal/index/index.go
@@ -0,0 +1,271 @@
1package index
2
3import (
4 "database/sql"
5 "encoding/binary"
6 "math"
7 "os"
8 "path/filepath"
9 "sort"
10
11 _ "modernc.org/sqlite"
12
13 "code.northwest.io/codevec/internal/chunker"
14)
15
16// Index stores chunks and embeddings in SQLite
17type Index struct {
18 db *sql.DB
19 dims int
20}
21
22// Open opens or creates an index at the given path
23func Open(path string, dims int) (*Index, error) {
24 // Ensure directory exists
25 dir := filepath.Dir(path)
26 if err := os.MkdirAll(dir, 0755); err != nil {
27 return nil, err
28 }
29
30 db, err := sql.Open("sqlite", path)
31 if err != nil {
32 return nil, err
33 }
34
35 idx := &Index{db: db, dims: dims}
36 if err := idx.init(); err != nil {
37 db.Close()
38 return nil, err
39 }
40
41 return idx, nil
42}
43
44func (idx *Index) init() error {
45 // Create chunks table with embedding column
46 _, err := idx.db.Exec(`
47 CREATE TABLE IF NOT EXISTS chunks (
48 id INTEGER PRIMARY KEY,
49 file TEXT NOT NULL,
50 start_line INTEGER NOT NULL,
51 end_line INTEGER NOT NULL,
52 chunk_type TEXT,
53 name TEXT,
54 content TEXT NOT NULL,
55 hash TEXT NOT NULL,
56 embedding BLOB,
57 created_at INTEGER DEFAULT (unixepoch())
58 )
59 `)
60 if err != nil {
61 return err
62 }
63
64 // Create files table for tracking indexed files
65 _, err = idx.db.Exec(`
66 CREATE TABLE IF NOT EXISTS files (
67 path TEXT PRIMARY KEY,
68 hash TEXT NOT NULL,
69 indexed_at INTEGER DEFAULT (unixepoch())
70 )
71 `)
72 if err != nil {
73 return err
74 }
75
76 // Create metadata table
77 _, err = idx.db.Exec(`
78 CREATE TABLE IF NOT EXISTS metadata (
79 key TEXT PRIMARY KEY,
80 value TEXT
81 )
82 `)
83 if err != nil {
84 return err
85 }
86
87 // Index on file for faster deletion
88 _, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`)
89 return err
90}
91
92// Close closes the index
93func (idx *Index) Close() error {
94 return idx.db.Close()
95}
96
97// InsertChunk inserts a chunk with its embedding
98func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error {
99 embeddingBlob := serializeEmbedding(embedding)
100 _, err := idx.db.Exec(`
101 INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash, embedding)
102 VALUES (?, ?, ?, ?, ?, ?, ?, ?)
103 `, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash, embeddingBlob)
104 return err
105}
106
107// SearchResult represents a search result
108type SearchResult struct {
109 Chunk chunker.Chunk
110 Distance float64
111}
112
113// Search finds chunks similar to the query embedding using cosine similarity
114func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) {
115 // Load all embeddings
116 rows, err := idx.db.Query(`
117 SELECT id, file, start_line, end_line, chunk_type, name, content, hash, embedding
118 FROM chunks
119 WHERE embedding IS NOT NULL
120 `)
121 if err != nil {
122 return nil, err
123 }
124 defer rows.Close()
125
126 type candidate struct {
127 chunk chunker.Chunk
128 distance float64
129 }
130 var candidates []candidate
131
132 for rows.Next() {
133 var id int64
134 var c chunker.Chunk
135 var embBlob []byte
136 err := rows.Scan(&id, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash, &embBlob)
137 if err != nil {
138 return nil, err
139 }
140
141 emb := deserializeEmbedding(embBlob)
142 dist := cosineDistance(queryEmb, emb)
143 candidates = append(candidates, candidate{chunk: c, distance: dist})
144 }
145
146 if err := rows.Err(); err != nil {
147 return nil, err
148 }
149
150 // Sort by distance (lower is better)
151 sort.Slice(candidates, func(i, j int) bool {
152 return candidates[i].distance < candidates[j].distance
153 })
154
155 // Return top-k
156 if limit > len(candidates) {
157 limit = len(candidates)
158 }
159
160 results := make([]SearchResult, limit)
161 for i := 0; i < limit; i++ {
162 results[i] = SearchResult{
163 Chunk: candidates[i].chunk,
164 Distance: candidates[i].distance,
165 }
166 }
167
168 return results, nil
169}
170
171// GetFileHash returns the stored hash for a file, or empty string if not indexed
172func (idx *Index) GetFileHash(path string) (string, error) {
173 var hash string
174 err := idx.db.QueryRow(`SELECT hash FROM files WHERE path = ?`, path).Scan(&hash)
175 if err == sql.ErrNoRows {
176 return "", nil
177 }
178 return hash, err
179}
180
181// SetFileHash updates the hash for a file
182func (idx *Index) SetFileHash(path, hash string) error {
183 _, err := idx.db.Exec(`
184 INSERT OR REPLACE INTO files (path, hash, indexed_at)
185 VALUES (?, ?, unixepoch())
186 `, path, hash)
187 return err
188}
189
190// DeleteChunksForFile removes all chunks for a file
191func (idx *Index) DeleteChunksForFile(path string) error {
192 _, err := idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path)
193 if err != nil {
194 return err
195 }
196 _, err = idx.db.Exec(`DELETE FROM files WHERE path = ?`, path)
197 return err
198}
199
200// Stats returns index statistics
201type Stats struct {
202 Files int
203 Chunks int
204}
205
206func (idx *Index) Stats() (Stats, error) {
207 var s Stats
208 err := idx.db.QueryRow(`SELECT COUNT(*) FROM files`).Scan(&s.Files)
209 if err != nil {
210 return s, err
211 }
212 err = idx.db.QueryRow(`SELECT COUNT(*) FROM chunks`).Scan(&s.Chunks)
213 return s, err
214}
215
216// SetMetadata stores metadata
217func (idx *Index) SetMetadata(key, value string) error {
218 _, err := idx.db.Exec(`INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)`, key, value)
219 return err
220}
221
222// GetMetadata retrieves metadata
223func (idx *Index) GetMetadata(key string) (string, error) {
224 var value string
225 err := idx.db.QueryRow(`SELECT value FROM metadata WHERE key = ?`, key).Scan(&value)
226 if err == sql.ErrNoRows {
227 return "", nil
228 }
229 return value, err
230}
231
232// serializeEmbedding converts float32 slice to bytes
233func serializeEmbedding(embedding []float32) []byte {
234 buf := make([]byte, len(embedding)*4)
235 for i, v := range embedding {
236 binary.LittleEndian.PutUint32(buf[i*4:], math.Float32bits(v))
237 }
238 return buf
239}
240
241// deserializeEmbedding converts bytes back to float32 slice
242func deserializeEmbedding(data []byte) []float32 {
243 n := len(data) / 4
244 result := make([]float32, n)
245 for i := 0; i < n; i++ {
246 bits := binary.LittleEndian.Uint32(data[i*4:])
247 result[i] = math.Float32frombits(bits)
248 }
249 return result
250}
251
252// cosineDistance computes 1 - cosine_similarity (so lower is more similar)
253func cosineDistance(a, b []float32) float64 {
254 if len(a) != len(b) {
255 return 1.0
256 }
257
258 var dotProduct, normA, normB float64
259 for i := range a {
260 dotProduct += float64(a[i]) * float64(b[i])
261 normA += float64(a[i]) * float64(a[i])
262 normB += float64(b[i]) * float64(b[i])
263 }
264
265 if normA == 0 || normB == 0 {
266 return 1.0
267 }
268
269 similarity := dotProduct / (math.Sqrt(normA) * math.Sqrt(normB))
270 return 1.0 - similarity
271}
diff --git a/internal/walker/walker.go b/internal/walker/walker.go
new file mode 100644
index 0000000..0ac470d
--- /dev/null
+++ b/internal/walker/walker.go
@@ -0,0 +1,109 @@
1package walker
2
3import (
4 "os"
5 "path/filepath"
6 "strings"
7
8 ignore "github.com/sabhiram/go-gitignore"
9)
10
11// DefaultIgnore patterns applied to all walks
12var DefaultIgnore = []string{
13 "vendor/",
14 "node_modules/",
15 ".git/",
16 ".codevec/",
17}
18
19// Walker walks a directory tree finding files to index
20type Walker struct {
21 root string
22 extensions []string // e.g., [".go"]
23 gitignore *ignore.GitIgnore
24}
25
26// New creates a walker for the given root directory
27func New(root string, extensions []string) (*Walker, error) {
28 root, err := filepath.Abs(root)
29 if err != nil {
30 return nil, err
31 }
32
33 w := &Walker{
34 root: root,
35 extensions: extensions,
36 }
37
38 // Load .gitignore if present
39 gitignorePath := filepath.Join(root, ".gitignore")
40 if _, err := os.Stat(gitignorePath); err == nil {
41 gi, err := ignore.CompileIgnoreFile(gitignorePath)
42 if err == nil {
43 w.gitignore = gi
44 }
45 }
46
47 return w, nil
48}
49
50// Walk returns all matching files in the directory tree
51func (w *Walker) Walk() ([]string, error) {
52 var files []string
53
54 err := filepath.WalkDir(w.root, func(path string, d os.DirEntry, err error) error {
55 if err != nil {
56 return err
57 }
58
59 // Get path relative to root for ignore matching
60 relPath, err := filepath.Rel(w.root, path)
61 if err != nil {
62 return err
63 }
64
65 // Skip default ignored directories
66 if d.IsDir() {
67 for _, pattern := range DefaultIgnore {
68 if strings.HasPrefix(relPath+"/", pattern) || relPath+"/" == pattern {
69 return filepath.SkipDir
70 }
71 }
72 }
73
74 // Skip if matched by .gitignore
75 if w.gitignore != nil && w.gitignore.MatchesPath(relPath) {
76 if d.IsDir() {
77 return filepath.SkipDir
78 }
79 return nil
80 }
81
82 // Skip directories and non-matching extensions
83 if d.IsDir() {
84 return nil
85 }
86
87 if !w.matchesExtension(path) {
88 return nil
89 }
90
91 files = append(files, path)
92 return nil
93 })
94
95 return files, err
96}
97
98func (w *Walker) matchesExtension(path string) bool {
99 if len(w.extensions) == 0 {
100 return true
101 }
102 ext := filepath.Ext(path)
103 for _, e := range w.extensions {
104 if ext == e {
105 return true
106 }
107 }
108 return false
109}