aboutsummaryrefslogtreecommitdiffstats
path: root/internal/index/index.go
blob: 5ce9f4f894caef0cddeb02758d58add0ed6c33d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
package index

import (
	"database/sql"
	"os"
	"path/filepath"

	sqlite_vec "github.com/asg017/sqlite-vec-go-bindings/cgo"
	_ "github.com/mattn/go-sqlite3"

	"code.northwest.io/codevec/internal/chunker"
)

// Index stores chunks and embeddings in SQLite with sqlite-vec
type Index struct {
	db   *sql.DB
	dims int
}

// Open opens or creates an index at the given path
func Open(path string, dims int) (*Index, error) {
	// Register sqlite-vec extension
	sqlite_vec.Auto()

	// Ensure directory exists
	dir := filepath.Dir(path)
	if err := os.MkdirAll(dir, 0755); err != nil {
		return nil, err
	}

	db, err := sql.Open("sqlite3", path)
	if err != nil {
		return nil, err
	}

	idx := &Index{db: db, dims: dims}
	if err := idx.init(); err != nil {
		db.Close()
		return nil, err
	}

	return idx, nil
}

func (idx *Index) init() error {
	// Create chunks table
	_, err := idx.db.Exec(`
		CREATE TABLE IF NOT EXISTS chunks (
			id INTEGER PRIMARY KEY,
			file TEXT NOT NULL,
			start_line INTEGER NOT NULL,
			end_line INTEGER NOT NULL,
			chunk_type TEXT,
			name TEXT,
			content TEXT NOT NULL,
			hash TEXT NOT NULL,
			created_at INTEGER DEFAULT (unixepoch())
		)
	`)
	if err != nil {
		return err
	}

	// Create files table for tracking indexed files
	_, err = idx.db.Exec(`
		CREATE TABLE IF NOT EXISTS files (
			path TEXT PRIMARY KEY,
			hash TEXT NOT NULL,
			indexed_at INTEGER DEFAULT (unixepoch())
		)
	`)
	if err != nil {
		return err
	}

	// Create metadata table
	_, err = idx.db.Exec(`
		CREATE TABLE IF NOT EXISTS metadata (
			key TEXT PRIMARY KEY,
			value TEXT
		)
	`)
	if err != nil {
		return err
	}

	// Create vec0 virtual table for vectors
	_, err = idx.db.Exec(`
		CREATE VIRTUAL TABLE IF NOT EXISTS vectors USING vec0(
			chunk_id INTEGER PRIMARY KEY,
			embedding FLOAT[768] distance_metric=cosine
		)
	`)
	if err != nil {
		return err
	}

	// Index on file for faster deletion
	_, err = idx.db.Exec(`CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file)`)
	return err
}

// Close closes the index
func (idx *Index) Close() error {
	return idx.db.Close()
}

// InsertChunk inserts a chunk with its embedding
func (idx *Index) InsertChunk(chunk chunker.Chunk, embedding []float32) error {
	// Insert chunk
	result, err := idx.db.Exec(`
		INSERT INTO chunks (file, start_line, end_line, chunk_type, name, content, hash)
		VALUES (?, ?, ?, ?, ?, ?, ?)
	`, chunk.File, chunk.StartLine, chunk.EndLine, chunk.Type, chunk.Name, chunk.Content, chunk.Hash)
	if err != nil {
		return err
	}

	chunkID, err := result.LastInsertId()
	if err != nil {
		return err
	}

	// Insert vector
	vecBlob, err := sqlite_vec.SerializeFloat32(embedding)
	if err != nil {
		return err
	}

	_, err = idx.db.Exec(`INSERT INTO vectors (chunk_id, embedding) VALUES (?, ?)`, chunkID, vecBlob)
	return err
}

// SearchResult represents a search result
type SearchResult struct {
	Chunk    chunker.Chunk
	Distance float64
}

// Search finds chunks similar to the query embedding using sqlite-vec
func (idx *Index) Search(queryEmb []float32, limit int) ([]SearchResult, error) {
	vecBlob, err := sqlite_vec.SerializeFloat32(queryEmb)
	if err != nil {
		return nil, err
	}

	// Query similar vectors
	rows, err := idx.db.Query(`
		SELECT v.chunk_id, v.distance, c.file, c.start_line, c.end_line, c.chunk_type, c.name, c.content, c.hash
		FROM vectors v
		JOIN chunks c ON c.id = v.chunk_id
		WHERE v.embedding MATCH ? AND k = ?
		ORDER BY v.distance
	`, vecBlob, limit)
	if err != nil {
		return nil, err
	}
	defer rows.Close()

	var results []SearchResult
	for rows.Next() {
		var chunkID int64
		var distance float64
		var c chunker.Chunk
		err := rows.Scan(&chunkID, &distance, &c.File, &c.StartLine, &c.EndLine, &c.Type, &c.Name, &c.Content, &c.Hash)
		if err != nil {
			return nil, err
		}
		results = append(results, SearchResult{Chunk: c, Distance: distance})
	}

	return results, rows.Err()
}

// GetFileHash returns the stored hash for a file, or empty string if not indexed
func (idx *Index) GetFileHash(path string) (string, error) {
	var hash string
	err := idx.db.QueryRow(`SELECT hash FROM files WHERE path = ?`, path).Scan(&hash)
	if err == sql.ErrNoRows {
		return "", nil
	}
	return hash, err
}

// SetFileHash updates the hash for a file
func (idx *Index) SetFileHash(path, hash string) error {
	_, err := idx.db.Exec(`
		INSERT OR REPLACE INTO files (path, hash, indexed_at)
		VALUES (?, ?, unixepoch())
	`, path, hash)
	return err
}

// DeleteChunksForFile removes all chunks for a file
func (idx *Index) DeleteChunksForFile(path string) error {
	// Delete vectors for chunks in this file
	_, err := idx.db.Exec(`DELETE FROM vectors WHERE chunk_id IN (SELECT id FROM chunks WHERE file = ?)`, path)
	if err != nil {
		return err
	}

	_, err = idx.db.Exec(`DELETE FROM chunks WHERE file = ?`, path)
	if err != nil {
		return err
	}
	_, err = idx.db.Exec(`DELETE FROM files WHERE path = ?`, path)
	return err
}

// Stats returns index statistics
type Stats struct {
	Files  int
	Chunks int
}

func (idx *Index) Stats() (Stats, error) {
	var s Stats
	err := idx.db.QueryRow(`SELECT COUNT(*) FROM files`).Scan(&s.Files)
	if err != nil {
		return s, err
	}
	err = idx.db.QueryRow(`SELECT COUNT(*) FROM chunks`).Scan(&s.Chunks)
	return s, err
}

// SetMetadata stores metadata
func (idx *Index) SetMetadata(key, value string) error {
	_, err := idx.db.Exec(`INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)`, key, value)
	return err
}

// GetMetadata retrieves metadata
func (idx *Index) GetMetadata(key string) (string, error) {
	var value string
	err := idx.db.QueryRow(`SELECT value FROM metadata WHERE key = ?`, key).Scan(&value)
	if err == sql.ErrNoRows {
		return "", nil
	}
	return value, err
}