aboutsummaryrefslogtreecommitdiffstats
path: root/cmd/codevec
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/codevec')
-rw-r--r--cmd/codevec/main.go369
1 files changed, 369 insertions, 0 deletions
diff --git a/cmd/codevec/main.go b/cmd/codevec/main.go
new file mode 100644
index 0000000..8337367
--- /dev/null
+++ b/cmd/codevec/main.go
@@ -0,0 +1,369 @@
1package main
2
3import (
4 "context"
5 "crypto/sha256"
6 "encoding/json"
7 "fmt"
8 "os"
9 "path/filepath"
10 "strings"
11
12 "github.com/spf13/cobra"
13
14 "code.northwest.io/codevec/internal/chunker"
15 "code.northwest.io/codevec/internal/embedder"
16 "code.northwest.io/codevec/internal/index"
17 "code.northwest.io/codevec/internal/walker"
18)
19
20const codevecDir = ".codevec"
21const indexFile = "index.db"
22
23var rootCmd = &cobra.Command{
24 Use: "codevec",
25 Short: "Semantic code search via embeddings",
26 Long: `Index your codebase and query by concept. Get relevant code chunks with file paths and line numbers.`,
27}
28
29var indexCmd = &cobra.Command{
30 Use: "index [path]",
31 Short: "Index a directory for semantic search",
32 Args: cobra.MaximumNArgs(1),
33 RunE: runIndex,
34}
35
36var queryCmd = &cobra.Command{
37 Use: "query <text>",
38 Short: "Search for relevant code",
39 Args: cobra.ExactArgs(1),
40 RunE: runQuery,
41}
42
43var statusCmd = &cobra.Command{
44 Use: "status",
45 Short: "Show index statistics",
46 RunE: runStatus,
47}
48
49func init() {
50 // index flags
51 indexCmd.Flags().BoolP("force", "f", false, "Re-index everything")
52 indexCmd.Flags().BoolP("verbose", "v", false, "Show progress")
53 indexCmd.Flags().StringP("provider", "p", "ollama", "Embedding provider (ollama, openai)")
54 indexCmd.Flags().StringP("model", "m", "", "Embedding model (default: provider-specific)")
55
56 // query flags
57 queryCmd.Flags().IntP("limit", "l", 10, "Max results")
58 queryCmd.Flags().Float64P("threshold", "t", 0.0, "Min similarity score (0-1)")
59 queryCmd.Flags().BoolP("show", "s", false, "Print chunk content")
60 queryCmd.Flags().Bool("json", false, "Output as JSON")
61
62 rootCmd.AddCommand(indexCmd)
63 rootCmd.AddCommand(queryCmd)
64 rootCmd.AddCommand(statusCmd)
65}
66
67func main() {
68 if err := rootCmd.Execute(); err != nil {
69 os.Exit(1)
70 }
71}
72
73func runIndex(cmd *cobra.Command, args []string) error {
74 path := "."
75 if len(args) > 0 {
76 path = args[0]
77 }
78
79 force, _ := cmd.Flags().GetBool("force")
80 verbose, _ := cmd.Flags().GetBool("verbose")
81 provider, _ := cmd.Flags().GetString("provider")
82 model, _ := cmd.Flags().GetString("model")
83
84 // Resolve absolute path
85 absPath, err := filepath.Abs(path)
86 if err != nil {
87 return err
88 }
89
90 // Create embedder
91 emb, err := embedder.New(provider, model)
92 if err != nil {
93 return err
94 }
95
96 // Open index
97 indexPath := filepath.Join(absPath, codevecDir, indexFile)
98 idx, err := index.Open(indexPath, emb.Dimensions())
99 if err != nil {
100 return fmt.Errorf("failed to open index: %w", err)
101 }
102 defer idx.Close()
103
104 // Store metadata
105 idx.SetMetadata("provider", provider)
106 if model != "" {
107 idx.SetMetadata("model", model)
108 }
109 idx.SetMetadata("dimensions", fmt.Sprintf("%d", emb.Dimensions()))
110
111 // Walk directory
112 w, err := walker.New(absPath, []string{".go"})
113 if err != nil {
114 return err
115 }
116
117 files, err := w.Walk()
118 if err != nil {
119 return err
120 }
121
122 if verbose {
123 fmt.Printf("Found %d Go files\n", len(files))
124 }
125
126 // Create chunker
127 goChunker := chunker.NewGoChunker()
128
129 // Process files
130 var totalChunks int
131 var skipped int
132 ctx := context.Background()
133
134 for _, file := range files {
135 // Read file content
136 content, err := os.ReadFile(file)
137 if err != nil {
138 fmt.Fprintf(os.Stderr, "Warning: failed to read %s: %v\n", file, err)
139 continue
140 }
141
142 // Compute file hash
143 fileHash := fmt.Sprintf("%x", sha256.Sum256(content))
144
145 // Check if already indexed
146 if !force {
147 existingHash, _ := idx.GetFileHash(file)
148 if existingHash == fileHash {
149 skipped++
150 continue
151 }
152 }
153
154 // Delete old chunks for this file
155 idx.DeleteChunksForFile(file)
156
157 // Chunk file
158 chunks, err := goChunker.Chunk(file, content)
159 if err != nil {
160 fmt.Fprintf(os.Stderr, "Warning: failed to parse %s: %v\n", file, err)
161 continue
162 }
163
164 if len(chunks) == 0 {
165 continue
166 }
167
168 // Generate embeddings
169 texts := make([]string, len(chunks))
170 for i, c := range chunks {
171 // Include file path and name for context
172 relPath, _ := filepath.Rel(absPath, c.File)
173 texts[i] = fmt.Sprintf("File: %s\n%s %s\n\n%s", relPath, c.Type, c.Name, c.Content)
174 }
175
176 embeddings, err := emb.Embed(ctx, texts)
177 if err != nil {
178 return fmt.Errorf("embedding failed for %s: %w", file, err)
179 }
180
181 // Store chunks and embeddings
182 for i, chunk := range chunks {
183 if err := idx.InsertChunk(chunk, embeddings[i]); err != nil {
184 return fmt.Errorf("failed to insert chunk: %w", err)
185 }
186 }
187
188 // Update file hash
189 idx.SetFileHash(file, fileHash)
190
191 totalChunks += len(chunks)
192 if verbose {
193 relPath, _ := filepath.Rel(absPath, file)
194 fmt.Printf(" %s: %d chunks\n", relPath, len(chunks))
195 }
196 }
197
198 fmt.Printf("Indexed %d chunks from %d files", totalChunks, len(files)-skipped)
199 if skipped > 0 {
200 fmt.Printf(" (%d unchanged)", skipped)
201 }
202 fmt.Println()
203
204 return nil
205}
206
207func runQuery(cmd *cobra.Command, args []string) error {
208 query := args[0]
209 limit, _ := cmd.Flags().GetInt("limit")
210 threshold, _ := cmd.Flags().GetFloat64("threshold")
211 show, _ := cmd.Flags().GetBool("show")
212 jsonOutput, _ := cmd.Flags().GetBool("json")
213
214 // Find index
215 cwd, err := os.Getwd()
216 if err != nil {
217 return err
218 }
219 indexPath := filepath.Join(cwd, codevecDir, indexFile)
220
221 if _, err := os.Stat(indexPath); os.IsNotExist(err) {
222 return fmt.Errorf("no index found. Run 'codevec index' first")
223 }
224
225 // Get provider/model from metadata
226 idx, err := index.Open(indexPath, 768) // temp dims, we'll read from metadata
227 if err != nil {
228 return err
229 }
230
231 provider, _ := idx.GetMetadata("provider")
232 model, _ := idx.GetMetadata("model")
233 idx.Close()
234
235 if provider == "" {
236 provider = "ollama"
237 }
238
239 // Create embedder
240 emb, err := embedder.New(provider, model)
241 if err != nil {
242 return err
243 }
244
245 // Reopen with correct dimensions
246 idx, err = index.Open(indexPath, emb.Dimensions())
247 if err != nil {
248 return err
249 }
250 defer idx.Close()
251
252 // Generate query embedding
253 ctx := context.Background()
254 embeddings, err := emb.Embed(ctx, []string{query})
255 if err != nil {
256 return fmt.Errorf("failed to embed query: %w", err)
257 }
258
259 // Search
260 results, err := idx.Search(embeddings[0], limit)
261 if err != nil {
262 return fmt.Errorf("search failed: %w", err)
263 }
264
265 // Filter by threshold (distance is lower = more similar)
266 // Convert distance to similarity for threshold comparison
267 var filtered []index.SearchResult
268 for _, r := range results {
269 similarity := 1 - r.Distance
270 if similarity >= threshold {
271 filtered = append(filtered, r)
272 }
273 }
274 results = filtered
275
276 // Output
277 if jsonOutput {
278 type jsonResult struct {
279 File string `json:"file"`
280 StartLine int `json:"start_line"`
281 EndLine int `json:"end_line"`
282 Type string `json:"type"`
283 Name string `json:"name"`
284 Score float64 `json:"score"`
285 Content string `json:"content,omitempty"`
286 }
287
288 var output []jsonResult
289 for _, r := range results {
290 relPath, _ := filepath.Rel(cwd, r.Chunk.File)
291 jr := jsonResult{
292 File: relPath,
293 StartLine: r.Chunk.StartLine,
294 EndLine: r.Chunk.EndLine,
295 Type: r.Chunk.Type,
296 Name: r.Chunk.Name,
297 Score: 1 - r.Distance,
298 }
299 if show {
300 jr.Content = r.Chunk.Content
301 }
302 output = append(output, jr)
303 }
304
305 enc := json.NewEncoder(os.Stdout)
306 enc.SetIndent("", " ")
307 return enc.Encode(output)
308 }
309
310 // Text output
311 if len(results) == 0 {
312 fmt.Println("No results found")
313 return nil
314 }
315
316 for _, r := range results {
317 relPath, _ := filepath.Rel(cwd, r.Chunk.File)
318 similarity := 1 - r.Distance
319 fmt.Printf("%s:%d-%d %s (%.2f)\n", relPath, r.Chunk.StartLine, r.Chunk.EndLine, r.Chunk.Name, similarity)
320 if show {
321 fmt.Println(strings.Repeat("-", 40))
322 fmt.Println(r.Chunk.Content)
323 fmt.Println()
324 }
325 }
326
327 return nil
328}
329
330func runStatus(cmd *cobra.Command, args []string) error {
331 cwd, err := os.Getwd()
332 if err != nil {
333 return err
334 }
335 indexPath := filepath.Join(cwd, codevecDir, indexFile)
336
337 if _, err := os.Stat(indexPath); os.IsNotExist(err) {
338 fmt.Println("No index found. Run 'codevec index' first.")
339 return nil
340 }
341
342 idx, err := index.Open(indexPath, 768)
343 if err != nil {
344 return err
345 }
346 defer idx.Close()
347
348 stats, err := idx.Stats()
349 if err != nil {
350 return err
351 }
352
353 provider, _ := idx.GetMetadata("provider")
354 model, _ := idx.GetMetadata("model")
355 dims, _ := idx.GetMetadata("dimensions")
356
357 fmt.Printf("Index: %s\n", indexPath)
358 fmt.Printf("Files: %d\n", stats.Files)
359 fmt.Printf("Chunks: %d\n", stats.Chunks)
360 fmt.Printf("Provider: %s\n", provider)
361 if model != "" {
362 fmt.Printf("Model: %s\n", model)
363 }
364 if dims != "" {
365 fmt.Printf("Dimensions: %s\n", dims)
366 }
367
368 return nil
369}