diff options
| author | Clawd <ai@clawd.bot> | 2026-03-05 07:14:35 -0800 |
|---|---|---|
| committer | Clawd <ai@clawd.bot> | 2026-03-05 07:14:35 -0800 |
| commit | 9ecdd63319b897e77e89b5dd8d0ee9264f0be0ff (patch) | |
| tree | bb1ee41186cc814113acc746f00a6ad8ab4afe5f /PLAN.md | |
| parent | 1ef3bb0128d59f6092199bb58eb0127ac7808899 (diff) | |
Update plan: Cobra, internal API, no tests
Diffstat (limited to 'PLAN.md')
| -rw-r--r-- | PLAN.md | 55 |
1 files changed, 25 insertions, 30 deletions
| @@ -32,9 +32,9 @@ codevec/ | |||
| 32 | 32 | ||
| 33 | **Tasks:** | 33 | **Tasks:** |
| 34 | - [ ] `go mod init code.northwest.io/codevec` | 34 | - [ ] `go mod init code.northwest.io/codevec` |
| 35 | - [ ] Basic CLI with cobra or just flag package | 35 | - [ ] Basic CLI with Cobra |
| 36 | - [ ] Subcommands: `index`, `query`, `status` | 36 | - [ ] Subcommands: `index`, `query`, `status` |
| 37 | - [ ] Makefile with `build`, `test`, `install` | 37 | - [ ] Makefile with `build`, `install` |
| 38 | 38 | ||
| 39 | --- | 39 | --- |
| 40 | 40 | ||
| @@ -58,9 +58,7 @@ func (w *Walker) Walk() ([]string, error) | |||
| 58 | - [ ] Implement directory walking with `filepath.WalkDir` | 58 | - [ ] Implement directory walking with `filepath.WalkDir` |
| 59 | - [ ] Parse `.gitignore` patterns (use `go-gitignore` or similar) | 59 | - [ ] Parse `.gitignore` patterns (use `go-gitignore` or similar) |
| 60 | - [ ] Filter to `.go` files only (configurable later) | 60 | - [ ] Filter to `.go` files only (configurable later) |
| 61 | - [ ] Skip `vendor/`, `testdata/`, `*_test.go` by default (configurable) | 61 | - [ ] Skip `vendor/`, `testdata/` by default (configurable) |
| 62 | |||
| 63 | **Test:** Walk the `nostr` SDK repo, verify correct file list. | ||
| 64 | 62 | ||
| 65 | --- | 63 | --- |
| 66 | 64 | ||
| @@ -101,13 +99,11 @@ type Chunker interface { | |||
| 101 | - [ ] Handle edge cases: empty files, syntax errors (skip gracefully) | 99 | - [ ] Handle edge cases: empty files, syntax errors (skip gracefully) |
| 102 | - [ ] Chunk size limit: if function > 1000 tokens, note it but keep whole | 100 | - [ ] Chunk size limit: if function > 1000 tokens, note it but keep whole |
| 103 | 101 | ||
| 104 | **Test:** Chunk `nostr/relay.go`, verify functions extracted correctly. | ||
| 105 | |||
| 106 | --- | 102 | --- |
| 107 | 103 | ||
| 108 | ## Phase 4: Embedding Generation | 104 | ## Phase 4: Embedding Generation |
| 109 | 105 | ||
| 110 | Generate embeddings via OpenAI API. | 106 | Generate embeddings via OpenAI-compatible API (internal endpoint). |
| 111 | 107 | ||
| 112 | **Input:** List of chunks | 108 | **Input:** List of chunks |
| 113 | **Output:** Chunks with embedding vectors | 109 | **Output:** Chunks with embedding vectors |
| @@ -117,21 +113,25 @@ type Embedder interface { | |||
| 117 | Embed(ctx context.Context, texts []string) ([][]float32, error) | 113 | Embed(ctx context.Context, texts []string) ([][]float32, error) |
| 118 | } | 114 | } |
| 119 | 115 | ||
| 120 | type OpenAIEmbedder struct { | 116 | type Embedder struct { |
| 121 | apiKey string | 117 | baseURL string // defaults to OpenAI, configurable for internal API |
| 122 | model string // "text-embedding-3-small" | 118 | apiKey string |
| 119 | model string // "text-embedding-3-small" | ||
| 123 | } | 120 | } |
| 124 | ``` | 121 | ``` |
| 125 | 122 | ||
| 126 | **Batching:** OpenAI supports up to 2048 inputs per request. Batch chunks to minimize API calls. | 123 | **Batching:** Batch chunks to minimize API calls (~100 per request). |
| 124 | |||
| 125 | **Config:** | ||
| 126 | - `OPENAI_API_KEY` — API key (standard env var) | ||
| 127 | - `OPENAI_BASE_URL` — Override endpoint for internal API (optional) | ||
| 128 | - `--model` flag for model selection | ||
| 127 | 129 | ||
| 128 | **Tasks:** | 130 | **Tasks:** |
| 129 | - [ ] Implement OpenAI embedding client (stdlib `net/http`, no SDK) | 131 | - [ ] Implement OpenAI-compatible embedding client (stdlib `net/http`) |
| 130 | - [ ] Batch requests (100 chunks per request to stay safe) | 132 | - [ ] Support custom base URL for internal API |
| 133 | - [ ] Batch requests | ||
| 131 | - [ ] Handle rate limits with exponential backoff | 134 | - [ ] Handle rate limits with exponential backoff |
| 132 | - [ ] Config: model selection, API key from env `OPENAI_API_KEY` | ||
| 133 | |||
| 134 | **Test:** Embed a few chunks, verify 1536-dim vectors returned. | ||
| 135 | 135 | ||
| 136 | --- | 136 | --- |
| 137 | 137 | ||
| @@ -182,8 +182,6 @@ LIMIT 10; | |||
| 182 | - [ ] Query by vector similarity | 182 | - [ ] Query by vector similarity |
| 183 | - [ ] Store in `.codevec/index.db` | 183 | - [ ] Store in `.codevec/index.db` |
| 184 | 184 | ||
| 185 | **Test:** Insert chunks, query, verify results ranked by similarity. | ||
| 186 | |||
| 187 | --- | 185 | --- |
| 188 | 186 | ||
| 189 | ## Phase 6: CLI Commands | 187 | ## Phase 6: CLI Commands |
| @@ -270,11 +268,8 @@ Only re-index changed files. | |||
| 270 | 268 | ||
| 271 | --- | 269 | --- |
| 272 | 270 | ||
| 273 | ## Phase 8: Testing & Polish | 271 | ## Phase 8: Polish |
| 274 | 272 | ||
| 275 | - [ ] Unit tests for chunker | ||
| 276 | - [ ] Unit tests for walker | ||
| 277 | - [ ] Integration test: index small repo, query, verify results | ||
| 278 | - [ ] Error handling: missing API key, parse failures, network errors | 273 | - [ ] Error handling: missing API key, parse failures, network errors |
| 279 | - [ ] README with usage examples | 274 | - [ ] README with usage examples |
| 280 | - [ ] `make install` to put binary in PATH | 275 | - [ ] `make install` to put binary in PATH |
| @@ -316,17 +311,17 @@ require ( | |||
| 316 | | 5. Storage | 2 hr | | 311 | | 5. Storage | 2 hr | |
| 317 | | 6. CLI | 1 hr | | 312 | | 6. CLI | 1 hr | |
| 318 | | 7. Incremental | 1 hr | | 313 | | 7. Incremental | 1 hr | |
| 319 | | 8. Polish | 1 hr | | 314 | | 8. Polish | 30 min | |
| 320 | | **Total** | ~10 hr | | 315 | | **Total** | ~9 hr | |
| 321 | 316 | ||
| 322 | --- | 317 | --- |
| 323 | 318 | ||
| 324 | ## Open Decisions | 319 | ## Decisions |
| 325 | 320 | ||
| 326 | 1. **CLI framework:** `cobra` vs stdlib `flag`? Leaning stdlib for simplicity. | 321 | 1. **CLI framework:** Cobra |
| 327 | 2. **Config file:** YAML in `.codevec/config.yaml` or just flags? | 322 | 2. **Config:** Flags preferred; config file only if complexity warrants it |
| 328 | 3. **Chunk overlap:** Include N lines of context above/below functions? | 323 | 3. **Test files:** Index `*_test.go` by default (useful context) |
| 329 | 4. **Test files:** Index `*_test.go` by default or skip? | 324 | 4. **Tests:** None — move fast |
| 330 | 325 | ||
| 331 | --- | 326 | --- |
| 332 | 327 | ||
