From 9ecdd63319b897e77e89b5dd8d0ee9264f0be0ff Mon Sep 17 00:00:00 2001
From: Clawd <ai@clawd.bot>
Date: Thu, 5 Mar 2026 07:14:35 -0800
Subject: Update plan: Cobra, internal API, no tests

---
 PLAN.md | 55 +++++++++++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 30 deletions(-)

(limited to 'PLAN.md')

diff --git a/PLAN.md b/PLAN.md
index a7253e1..4372a7a 100644
--- a/PLAN.md
+++ b/PLAN.md
@@ -32,9 +32,9 @@ codevec/
 
 **Tasks:**
 - [ ] `go mod init code.northwest.io/codevec`
-- [ ] Basic CLI with cobra or just flag package
+- [ ] Basic CLI with Cobra
 - [ ] Subcommands: `index`, `query`, `status`
-- [ ] Makefile with `build`, `test`, `install`
+- [ ] Makefile with `build`, `install`
 
 ---
 
@@ -58,9 +58,7 @@ func (w *Walker) Walk() ([]string, error)
 - [ ] Implement directory walking with `filepath.WalkDir`
 - [ ] Parse `.gitignore` patterns (use `go-gitignore` or similar)
 - [ ] Filter to `.go` files only (configurable later)
-- [ ] Skip `vendor/`, `testdata/`, `*_test.go` by default (configurable)
-
-**Test:** Walk the `nostr` SDK repo, verify correct file list.
+- [ ] Skip `vendor/`, `testdata/` by default (configurable)
 
 ---
 
@@ -101,13 +99,11 @@ type Chunker interface {
 - [ ] Handle edge cases: empty files, syntax errors (skip gracefully)
 - [ ] Chunk size limit: if function > 1000 tokens, note it but keep whole
 
-**Test:** Chunk `nostr/relay.go`, verify functions extracted correctly.
-
 ---
 
 ## Phase 4: Embedding Generation
 
-Generate embeddings via OpenAI API.
+Generate embeddings via OpenAI-compatible API (internal endpoint).
 
 **Input:** List of chunks
 **Output:** Chunks with embedding vectors
@@ -117,21 +113,25 @@ type Embedder interface {
     Embed(ctx context.Context, texts []string) ([][]float32, error)
 }
 
-type OpenAIEmbedder struct {
-    apiKey string
-    model  string  // "text-embedding-3-small"
+type Embedder struct {
+    baseURL string  // defaults to OpenAI, configurable for internal API
+    apiKey  string
+    model   string  // "text-embedding-3-small"
 }
 ```
 
-**Batching:** OpenAI supports up to 2048 inputs per request. Batch chunks to minimize API calls.
+**Batching:** Batch chunks to minimize API calls (~100 per request).
+
+**Config:**
+- `OPENAI_API_KEY` — API key (standard env var)
+- `OPENAI_BASE_URL` — Override endpoint for internal API (optional)
+- `--model` flag for model selection
 
 **Tasks:**
-- [ ] Implement OpenAI embedding client (stdlib `net/http`, no SDK)
-- [ ] Batch requests (100 chunks per request to stay safe)
+- [ ] Implement OpenAI-compatible embedding client (stdlib `net/http`)
+- [ ] Support custom base URL for internal API
+- [ ] Batch requests
 - [ ] Handle rate limits with exponential backoff
-- [ ] Config: model selection, API key from env `OPENAI_API_KEY`
-
-**Test:** Embed a few chunks, verify 1536-dim vectors returned.
 
 ---
 
@@ -182,8 +182,6 @@ LIMIT 10;
 - [ ] Query by vector similarity
 - [ ] Store in `.codevec/index.db`
 
-**Test:** Insert chunks, query, verify results ranked by similarity.
-
 ---
 
 ## Phase 6: CLI Commands
@@ -270,11 +268,8 @@ Only re-index changed files.
 
 ---
 
-## Phase 8: Testing & Polish
+## Phase 8: Polish
 
-- [ ] Unit tests for chunker
-- [ ] Unit tests for walker
-- [ ] Integration test: index small repo, query, verify results
 - [ ] Error handling: missing API key, parse failures, network errors
 - [ ] README with usage examples
 - [ ] `make install` to put binary in PATH
@@ -316,17 +311,17 @@ require (
 | 5. Storage | 2 hr |
 | 6. CLI | 1 hr |
 | 7. Incremental | 1 hr |
-| 8. Polish | 1 hr |
-| **Total** | ~10 hr |
+| 8. Polish | 30 min |
+| **Total** | ~9 hr |
 
 ---
 
-## Open Decisions
+## Decisions
 
-1. **CLI framework:** `cobra` vs stdlib `flag`? Leaning stdlib for simplicity.
-2. **Config file:** YAML in `.codevec/config.yaml` or just flags?
-3. **Chunk overlap:** Include N lines of context above/below functions?
-4. **Test files:** Index `*_test.go` by default or skip?
+1. **CLI framework:** Cobra
+2. **Config:** Flags preferred; config file only if complexity warrants it
+3. **Test files:** Index `*_test.go` by default (useful context)
+4. **Tests:** None — move fast
 
 ---
 
-- 
cgit v1.2.3