package chunker import ( "crypto/sha256" "fmt" "os" "strings" sitter "github.com/smacker/go-tree-sitter" "github.com/smacker/go-tree-sitter/golang" ) // Chunk represents a semantically meaningful piece of code type Chunk struct { File string StartLine int EndLine int Type string // "function", "method", "type" Name string Content string Hash string } // Chunker extracts semantic chunks from source code type Chunker interface { Chunk(path string, content []byte) ([]Chunk, error) } // GoChunker extracts chunks from Go source files using tree-sitter type GoChunker struct { parser *sitter.Parser } // NewGoChunker creates a new Go chunker func NewGoChunker() *GoChunker { parser := sitter.NewParser() parser.SetLanguage(golang.GetLanguage()) return &GoChunker{parser: parser} } // ChunkFile reads and chunks a file func (c *GoChunker) ChunkFile(path string) ([]Chunk, error) { content, err := os.ReadFile(path) if err != nil { return nil, err } return c.Chunk(path, content) } // Chunk extracts semantic chunks from Go source func (c *GoChunker) Chunk(path string, content []byte) ([]Chunk, error) { tree := c.parser.Parse(nil, content) if tree == nil { return nil, fmt.Errorf("failed to parse %s", path) } defer tree.Close() var chunks []Chunk root := tree.RootNode() // Walk top-level declarations for i := 0; i < int(root.ChildCount()); i++ { node := root.Child(i) chunk := c.extractChunk(node, content, path) if chunk != nil { chunks = append(chunks, *chunk) } } return chunks, nil } func (c *GoChunker) extractChunk(node *sitter.Node, content []byte, path string) *Chunk { nodeType := node.Type() switch nodeType { case "function_declaration": return c.extractFunction(node, content, path) case "method_declaration": return c.extractMethod(node, content, path) case "type_declaration": return c.extractType(node, content, path) } return nil } func (c *GoChunker) extractFunction(node *sitter.Node, content []byte, path string) *Chunk { nameNode := node.ChildByFieldName("name") if nameNode == nil { return nil } name := string(content[nameNode.StartByte():nameNode.EndByte()]) text := string(content[node.StartByte():node.EndByte()]) return &Chunk{ File: path, StartLine: int(node.StartPoint().Row) + 1, EndLine: int(node.EndPoint().Row) + 1, Type: "function", Name: name, Content: text, Hash: hash(text), } } func (c *GoChunker) extractMethod(node *sitter.Node, content []byte, path string) *Chunk { nameNode := node.ChildByFieldName("name") receiverNode := node.ChildByFieldName("receiver") if nameNode == nil { return nil } name := string(content[nameNode.StartByte():nameNode.EndByte()]) // Build receiver prefix like (*Server) or (s Server) if receiverNode != nil { recvText := string(content[receiverNode.StartByte():receiverNode.EndByte()]) // Extract type from receiver, e.g., "(s *Server)" -> "*Server" recvType := extractReceiverType(recvText) if recvType != "" { name = fmt.Sprintf("(%s).%s", recvType, name) } } text := string(content[node.StartByte():node.EndByte()]) return &Chunk{ File: path, StartLine: int(node.StartPoint().Row) + 1, EndLine: int(node.EndPoint().Row) + 1, Type: "method", Name: name, Content: text, Hash: hash(text), } } func (c *GoChunker) extractType(node *sitter.Node, content []byte, path string) *Chunk { // type_declaration contains type_spec children for i := 0; i < int(node.ChildCount()); i++ { child := node.Child(i) if child.Type() == "type_spec" { nameNode := child.ChildByFieldName("name") if nameNode == nil { continue } name := string(content[nameNode.StartByte():nameNode.EndByte()]) text := string(content[node.StartByte():node.EndByte()]) return &Chunk{ File: path, StartLine: int(node.StartPoint().Row) + 1, EndLine: int(node.EndPoint().Row) + 1, Type: "type", Name: name, Content: text, Hash: hash(text), } } } return nil } // extractReceiverType extracts the type from a receiver like "(s *Server)" -> "*Server" func extractReceiverType(recv string) string { // Remove parens recv = strings.TrimPrefix(recv, "(") recv = strings.TrimSuffix(recv, ")") recv = strings.TrimSpace(recv) // Split on space, take last part (the type) parts := strings.Fields(recv) if len(parts) == 0 { return "" } return parts[len(parts)-1] } func hash(s string) string { h := sha256.Sum256([]byte(s)) return fmt.Sprintf("%x", h[:8]) // First 8 bytes = 16 hex chars }