aboutsummaryrefslogtreecommitdiffstats
path: root/internal/chunker
diff options
context:
space:
mode:
Diffstat (limited to 'internal/chunker')
-rw-r--r--internal/chunker/chunker.go185
1 files changed, 185 insertions, 0 deletions
diff --git a/internal/chunker/chunker.go b/internal/chunker/chunker.go
new file mode 100644
index 0000000..f8de08d
--- /dev/null
+++ b/internal/chunker/chunker.go
@@ -0,0 +1,185 @@
1package chunker
2
3import (
4 "crypto/sha256"
5 "fmt"
6 "os"
7 "strings"
8
9 sitter "github.com/smacker/go-tree-sitter"
10 "github.com/smacker/go-tree-sitter/golang"
11)
12
13// Chunk represents a semantically meaningful piece of code
14type Chunk struct {
15 File string
16 StartLine int
17 EndLine int
18 Type string // "function", "method", "type"
19 Name string
20 Content string
21 Hash string
22}
23
24// Chunker extracts semantic chunks from source code
25type Chunker interface {
26 Chunk(path string, content []byte) ([]Chunk, error)
27}
28
29// GoChunker extracts chunks from Go source files using tree-sitter
30type GoChunker struct {
31 parser *sitter.Parser
32}
33
34// NewGoChunker creates a new Go chunker
35func NewGoChunker() *GoChunker {
36 parser := sitter.NewParser()
37 parser.SetLanguage(golang.GetLanguage())
38 return &GoChunker{parser: parser}
39}
40
41// ChunkFile reads and chunks a file
42func (c *GoChunker) ChunkFile(path string) ([]Chunk, error) {
43 content, err := os.ReadFile(path)
44 if err != nil {
45 return nil, err
46 }
47 return c.Chunk(path, content)
48}
49
50// Chunk extracts semantic chunks from Go source
51func (c *GoChunker) Chunk(path string, content []byte) ([]Chunk, error) {
52 tree := c.parser.Parse(nil, content)
53 if tree == nil {
54 return nil, fmt.Errorf("failed to parse %s", path)
55 }
56 defer tree.Close()
57
58 var chunks []Chunk
59 root := tree.RootNode()
60
61 // Walk top-level declarations
62 for i := 0; i < int(root.ChildCount()); i++ {
63 node := root.Child(i)
64 chunk := c.extractChunk(node, content, path)
65 if chunk != nil {
66 chunks = append(chunks, *chunk)
67 }
68 }
69
70 return chunks, nil
71}
72
73func (c *GoChunker) extractChunk(node *sitter.Node, content []byte, path string) *Chunk {
74 nodeType := node.Type()
75
76 switch nodeType {
77 case "function_declaration":
78 return c.extractFunction(node, content, path)
79 case "method_declaration":
80 return c.extractMethod(node, content, path)
81 case "type_declaration":
82 return c.extractType(node, content, path)
83 }
84
85 return nil
86}
87
88func (c *GoChunker) extractFunction(node *sitter.Node, content []byte, path string) *Chunk {
89 nameNode := node.ChildByFieldName("name")
90 if nameNode == nil {
91 return nil
92 }
93
94 name := string(content[nameNode.StartByte():nameNode.EndByte()])
95 text := string(content[node.StartByte():node.EndByte()])
96
97 return &Chunk{
98 File: path,
99 StartLine: int(node.StartPoint().Row) + 1,
100 EndLine: int(node.EndPoint().Row) + 1,
101 Type: "function",
102 Name: name,
103 Content: text,
104 Hash: hash(text),
105 }
106}
107
108func (c *GoChunker) extractMethod(node *sitter.Node, content []byte, path string) *Chunk {
109 nameNode := node.ChildByFieldName("name")
110 receiverNode := node.ChildByFieldName("receiver")
111 if nameNode == nil {
112 return nil
113 }
114
115 name := string(content[nameNode.StartByte():nameNode.EndByte()])
116
117 // Build receiver prefix like (*Server) or (s Server)
118 if receiverNode != nil {
119 recvText := string(content[receiverNode.StartByte():receiverNode.EndByte()])
120 // Extract type from receiver, e.g., "(s *Server)" -> "*Server"
121 recvType := extractReceiverType(recvText)
122 if recvType != "" {
123 name = fmt.Sprintf("(%s).%s", recvType, name)
124 }
125 }
126
127 text := string(content[node.StartByte():node.EndByte()])
128
129 return &Chunk{
130 File: path,
131 StartLine: int(node.StartPoint().Row) + 1,
132 EndLine: int(node.EndPoint().Row) + 1,
133 Type: "method",
134 Name: name,
135 Content: text,
136 Hash: hash(text),
137 }
138}
139
140func (c *GoChunker) extractType(node *sitter.Node, content []byte, path string) *Chunk {
141 // type_declaration contains type_spec children
142 for i := 0; i < int(node.ChildCount()); i++ {
143 child := node.Child(i)
144 if child.Type() == "type_spec" {
145 nameNode := child.ChildByFieldName("name")
146 if nameNode == nil {
147 continue
148 }
149
150 name := string(content[nameNode.StartByte():nameNode.EndByte()])
151 text := string(content[node.StartByte():node.EndByte()])
152
153 return &Chunk{
154 File: path,
155 StartLine: int(node.StartPoint().Row) + 1,
156 EndLine: int(node.EndPoint().Row) + 1,
157 Type: "type",
158 Name: name,
159 Content: text,
160 Hash: hash(text),
161 }
162 }
163 }
164 return nil
165}
166
167// extractReceiverType extracts the type from a receiver like "(s *Server)" -> "*Server"
168func extractReceiverType(recv string) string {
169 // Remove parens
170 recv = strings.TrimPrefix(recv, "(")
171 recv = strings.TrimSuffix(recv, ")")
172 recv = strings.TrimSpace(recv)
173
174 // Split on space, take last part (the type)
175 parts := strings.Fields(recv)
176 if len(parts) == 0 {
177 return ""
178 }
179 return parts[len(parts)-1]
180}
181
182func hash(s string) string {
183 h := sha256.Sum256([]byte(s))
184 return fmt.Sprintf("%x", h[:8]) // First 8 bytes = 16 hex chars
185}