From ebe1a973bbfd9ebfec89d1e1ac42a9bb967cf806 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Fri, 5 Jun 2026 09:56:19 -0700 Subject: [PATCH 01/31] wip: heex treesitter --- go.mod | 1 + go.sum | 2 ++ internal/lsp/elixir.go | 40 ++++++++++++++++++++++++++++++++ internal/lsp/server.go | 2 ++ internal/treesitter/variables.go | 27 +++++++++++++++++++++ 5 files changed, 72 insertions(+) diff --git a/go.mod b/go.mod index c2da4ee..91335b0 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ replace github.com/tree-sitter/tree-sitter-elixir => github.com/elixir-lang/tree require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-pointer v0.0.1 // indirect + github.com/phoenixframework/tree-sitter-heex v0.9.0 // indirect github.com/segmentio/asm v1.1.3 // indirect github.com/segmentio/encoding v0.3.4 // indirect github.com/spf13/pflag v1.0.9 // indirect diff --git a/go.sum b/go.sum index a8ff704..c213a9f 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,8 @@ github.com/mattn/go-pointer v0.0.1 h1:n+XhsuGeVO6MEAp7xyEukFINEa+Quek5psIR/ylA6o github.com/mattn/go-pointer v0.0.1/go.mod h1:2zXcozF6qYGgmsG+SeTZz3oAbFLdD3OWqnUbNvJZAlc= github.com/mattn/go-sqlite3 v1.14.38 h1:tDUzL85kMvOrvpCt8P64SbGgVFtJB11GPi2AdmITgb4= github.com/mattn/go-sqlite3 v1.14.38/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/phoenixframework/tree-sitter-heex v0.9.0 h1:19d/KenCYoturUoMq+fY5LXTwPhe5msaOx9cHGnPUj0= +github.com/phoenixframework/tree-sitter-heex v0.9.0/go.mod h1:ul+VP/WJ7qS+DPlkr15hyBrzYd1D1rvmyEKmw/7lGOQ= github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/internal/lsp/elixir.go b/internal/lsp/elixir.go index 783585e..45a72d3 100644 --- a/internal/lsp/elixir.go +++ b/internal/lsp/elixir.go @@ -7,6 +7,7 @@ import ( "unicode" "github.com/remoteoss/dexter/internal/parser" + "github.com/remoteoss/dexter/internal/treesitter" ) // TokenizedFile holds pre-tokenized source for efficient multi-operation queries. @@ -342,6 +343,28 @@ func isExprToken(k parser.TokenKind) bool { return k == parser.TokModule || k == parser.TokIdent || k == parser.TokAtom } +// Remove the sigil prefix and suffix, returning the contents and length of the prefix. +// Supports both double ("") and single (”) quotes, written inline and as heredocs. +func sigilContents(tok parser.Token, source []byte) (xml []byte, prefixLen int) { + // remove ~H prefix + prefixLen += 2 + + // check for heredoc and trailing newline, fallback on inline sigil + quotes := string(source[tok.Start+prefixLen : tok.Start+prefixLen+4]) + + var quoteLen int + if quotes == "\"\"\"\n" || quotes == "'''\n" { + quoteLen = 4 + } else { + quoteLen = 1 + } + + start := tok.Start + prefixLen + quoteLen + end := tok.End - quoteLen + + return source[start:end], prefixLen + quoteLen +} + // ExpressionAtCursor extracts the dotted expression at the cursor position // using the token stream. Unlike the char-based ExtractExpression, this // correctly ignores expressions inside strings, comments, heredocs, sigils, @@ -395,6 +418,23 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i } } + if tok.Kind == parser.TokSigil { + // strip heredoc delimiters + // parse heredoc contents as XML + // check if substring at cursor offset is a live_component or function component + // if so, return synthetic expression using that component + + // FIXME: support .heex files? + xml, prefixLen := sigilContents(tok, source) + sigilOffset := offset - (tok.Start + prefixLen) + treesitter.ParseHeexExpr(xml, uint(sigilOffset)) + + // log.Printf("xmlStr:\n%s\n", xmlStr) + // log.Printf("char: %c\n", xmlStr[offset-(tok.Start+6)]) + + return CursorContext{} + } + // Reject non-expression tokens (strings, comments, atoms, etc.) if !isExprToken(tok.Kind) { return CursorContext{} diff --git a/internal/lsp/server.go b/internal/lsp/server.go index c2fd345..743b05b 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -614,6 +614,8 @@ func (s *Server) Definition(ctx context.Context, params *protocol.DefinitionPara tf = NewTokenizedFile(text) } + log.Printf("Definition: line=%d col=%d", lineNum, col) + // Check for @module_attribute reference first if attrName := tf.ModuleAttributeAtCursor(lineNum, col); attrName != "" { if line, found := FindModuleAttributeDefinition(text, attrName); found { diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 282ee81..72772a4 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,8 +1,10 @@ package treesitter import ( + "log" "strings" + tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" tree_sitter "github.com/tree-sitter/go-tree-sitter" tree_sitter_elixir "github.com/tree-sitter/tree-sitter-elixir/bindings/go" ) @@ -23,6 +25,31 @@ func parseElixir(src []byte) (root *tree_sitter.Node, cleanup func()) { } } +// parseHeex parses HEEX present within ~H sigils and in `.heex` files +func parseHeex(src []byte) (root *tree_sitter.Node, cleanup func()) { + p := tree_sitter.NewParser() + if err := p.SetLanguage(tree_sitter.NewLanguage(tree_sitter_heex.Language())); err != nil { + p.Close() + return nil, nil + } + tree := p.Parse(src, nil) + return tree.RootNode(), func() { + tree.Close() + p.Close() + } +} + +func ParseHeexExpr(src []byte, offset uint) { + root, cleanup := parseHeex(src) + if root == nil { + return + } + defer cleanup() + + expr := root.DescendantForByteRange(offset, offset) + log.Printf("expr: %s %s\n", expr.ToSexp(), strings.TrimSpace(expr.Utf8Text(src))) +} + // VariableOccurrence is a position where a variable name appears. type VariableOccurrence struct { Line uint // 0-based From 44d103a5366e03290cfe411b93149adcdb75e796 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Fri, 5 Jun 2026 09:56:31 -0700 Subject: [PATCH 02/31] wip: page_live.ex with example heex --- page_live.ex | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 page_live.ex diff --git a/page_live.ex b/page_live.ex new file mode 100644 index 0000000..9fa2b04 --- /dev/null +++ b/page_live.ex @@ -0,0 +1,61 @@ +defmodule Components do + defmacro __using__(_opts) do + quote do + import Components.Baz + end + end +end + +defmodule Components.Foo do + def foo do + ~H""" + """ + end +end + +defmodule Components.Bar do + def bar do + ~H""" + """ + end +end + +defmodule Components.Baz do + def baz do + ~H""" + """ + end +end + +defmodule Components.Quux do + use Phoenix.LiveComponent + + def render do + ~H""" + """ + end +end + +defmodule PageLive do + alias Components.Foo + alias Components.Quux + + import Components.Bar + + def render(assigns) do + ~H""" +
+ + <.bar /> + <.baz /> + <.garply /> + <.live_component module={Quux} id="quux" /> +
+ """ + end + + defp garply(assigns) do + ~H""" + """ + end +end From 23b3aca5ef35d1b4a6405baad7f6ad0557ad585f Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Fri, 5 Jun 2026 13:30:31 -0700 Subject: [PATCH 03/31] expression parsing and LSP definition for HEEX --- go.mod | 3 +- internal/lsp/elixir.go | 70 ++++++++++---- internal/lsp/elixir_test.go | 30 ++++++ internal/lsp/server.go | 2 - internal/parser/parser_test.go | 26 ++++++ internal/parser/parser_tokenized.go | 21 +++++ internal/treesitter/variables.go | 104 ++++++++++++++++++++- internal/treesitter/variables_test.go | 130 ++++++++++++++++++++++++++ page_live.ex | 61 ------------ 9 files changed, 360 insertions(+), 87 deletions(-) delete mode 100644 page_live.ex diff --git a/go.mod b/go.mod index 91335b0..d451477 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,9 @@ module github.com/remoteoss/dexter go 1.26.1 require ( + github.com/google/go-cmp v0.5.6 github.com/mattn/go-sqlite3 v1.14.38 + github.com/phoenixframework/tree-sitter-heex v0.9.0 github.com/spf13/cobra v1.10.2 github.com/tree-sitter/go-tree-sitter v0.25.0 github.com/tree-sitter/tree-sitter-elixir v0.3.5 @@ -18,7 +20,6 @@ replace github.com/tree-sitter/tree-sitter-elixir => github.com/elixir-lang/tree require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/mattn/go-pointer v0.0.1 // indirect - github.com/phoenixframework/tree-sitter-heex v0.9.0 // indirect github.com/segmentio/asm v1.1.3 // indirect github.com/segmentio/encoding v0.3.4 // indirect github.com/spf13/pflag v1.0.9 // indirect diff --git a/internal/lsp/elixir.go b/internal/lsp/elixir.go index 45a72d3..ae3b8cb 100644 --- a/internal/lsp/elixir.go +++ b/internal/lsp/elixir.go @@ -345,24 +345,24 @@ func isExprToken(k parser.TokenKind) bool { // Remove the sigil prefix and suffix, returning the contents and length of the prefix. // Supports both double ("") and single (”) quotes, written inline and as heredocs. -func sigilContents(tok parser.Token, source []byte) (xml []byte, prefixLen int) { +func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, prefixLen int) { // remove ~H prefix prefixLen += 2 // check for heredoc and trailing newline, fallback on inline sigil quotes := string(source[tok.Start+prefixLen : tok.Start+prefixLen+4]) - var quoteLen int + var delimLen int if quotes == "\"\"\"\n" || quotes == "'''\n" { - quoteLen = 4 + delimLen = 4 } else { - quoteLen = 1 + delimLen = 1 } - start := tok.Start + prefixLen + quoteLen - end := tok.End - quoteLen + start := tok.Start + prefixLen + delimLen + end := tok.End - delimLen - return source[start:end], prefixLen + quoteLen + return source[start:end], string(source[tok.Start:][1:2]), prefixLen + delimLen } // ExpressionAtCursor extracts the dotted expression at the cursor position @@ -418,21 +418,55 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i } } + // Parse `~H` HEEX sigils if tok.Kind == parser.TokSigil { - // strip heredoc delimiters - // parse heredoc contents as XML - // check if substring at cursor offset is a live_component or function component - // if so, return synthetic expression using that component - - // FIXME: support .heex files? - xml, prefixLen := sigilContents(tok, source) + xml, sigil, prefixLen := sigilContents(tok, source) + if sigil != "H" { + return CursorContext{} + } sigilOffset := offset - (tok.Start + prefixLen) - treesitter.ParseHeexExpr(xml, uint(sigilOffset)) + heexExpr := treesitter.ParseHeexExpr(xml, uint(sigilOffset)) + if heexExpr == nil { + return CursorContext{} + } - // log.Printf("xmlStr:\n%s\n", xmlStr) - // log.Printf("char: %c\n", xmlStr[offset-(tok.Start+6)]) + if heexExpr.Expr != "" { + // Recursively parse interpolated HEEX expressions. + //
+ // ^^^^^^^ + tf := NewTokenizedFile(heexExpr.Expr) + line, col := parser.OffsetToLineCol(tf.lineStarts, sigilOffset-int(heexExpr.Offset)) + ctx := expressionAtCursorImpl(tf.tokens, tf.source, tf.lineStarts, line, col, full) - return CursorContext{} + if !ctx.Empty() { + // We've parsed the expression as though it were its own document, + // so we need to offset it's start/end indices to the outer document. + nestedOffset := tok.Start + prefixLen + int(heexExpr.Offset) + ctx.ExprStart += nestedOffset + ctx.ExprEnd += nestedOffset + } + + return ctx + } else { + // Heex function components are syntax sugar for module/function expressions. + // + // ^^^^^^^ + // <.baz> + // ^^^ + exprStart := tok.Start + prefixLen + int(heexExpr.Offset) + exprEnd := exprStart + len(heexExpr.Module) + len(heexExpr.Function) + if heexExpr.Module != "" { + // offset by additional 1 for "." dot character between module/function + exprEnd += 1 + } + + return CursorContext{ + ModuleRef: heexExpr.Module, + FunctionName: heexExpr.Function, + ExprStart: exprStart, + ExprEnd: exprEnd, + } + } } // Reject non-expression tokens (strings, comments, atoms, etc.) diff --git a/internal/lsp/elixir_test.go b/internal/lsp/elixir_test.go index 1f33c3d..d4a5dc9 100644 --- a/internal/lsp/elixir_test.go +++ b/internal/lsp/elixir_test.go @@ -4,6 +4,7 @@ import ( "strings" "testing" + "github.com/google/go-cmp/cmp" "github.com/remoteoss/dexter/internal/parser" ) @@ -436,6 +437,35 @@ func TestExpressionAtCursor_ExprBounds(t *testing.T) { } } +func TestExpressionAtCursor_HEEX(t *testing.T) { + tests := []struct { + code string + line, col int + want CursorContext + }{ + {"~H\"\"\"\n<.foo />\n\"\"\"", 1, 2, CursorContext{FunctionName: "foo", ExprStart: 8, ExprEnd: 11}}, + {"~H'''\n<.foo />\n'''", 1, 2, CursorContext{FunctionName: "foo", ExprStart: 8, ExprEnd: 11}}, + {"~H\"<.foo />\"", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, + {"~H'<.foo />'", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, + {"~H[<.foo />]", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, + {"~H[]", 0, 5, CursorContext{ModuleRef: "Foo", FunctionName: "bar", ExprStart: 4, ExprEnd: 11}}, + {"~H[<.live_component module={Foo.Bar} />]", 0, 28, CursorContext{ModuleRef: "Foo", ExprStart: 28, ExprEnd: 31}}, + {"~H[<.live_component module={Foo.Bar} />]", 0, 32, CursorContext{ModuleRef: "Foo.Bar", ExprStart: 28, ExprEnd: 35}}, + // interpolated expressions that aren't module/function should be ignored + {"~H[
]", 0, 11, CursorContext{}}, + // HTML tags should be ignored + {"~H[
]", 0, 4, CursorContext{}}, + } + + for _, tt := range tests { + tokens, source, lineStarts := tokenize(tt.code) + got := ExpressionAtCursor(tokens, source, lineStarts, tt.line, tt.col) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("ExpressionAtCursor(_, %#v, _, %d, %d)\nparse mismatch (-want +got):\n%s", tt.code, tt.line, tt.col, diff) + } + } +} + func TestCursorContext_Expr(t *testing.T) { tests := []struct { mod, fn, want string diff --git a/internal/lsp/server.go b/internal/lsp/server.go index 743b05b..c2fd345 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -614,8 +614,6 @@ func (s *Server) Definition(ctx context.Context, params *protocol.DefinitionPara tf = NewTokenizedFile(text) } - log.Printf("Definition: line=%d col=%d", lineNum, col) - // Check for @module_attribute reference first if attrName := tf.ModuleAttributeAtCursor(lineNum, col); attrName != "" { if line, found := FindModuleAttributeDefinition(text, attrName); found { diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index 5b7181a..da502dd 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -2673,6 +2673,32 @@ func TestLineColToOffset(t *testing.T) { } } +func TestOffsetToLineCol(t *testing.T) { + source := []byte("defmodule Foo do\n def bar, do: :ok\nend\n") + result := TokenizeFull(source) + + tests := []struct { + wantLine, wantCol int + offset int + }{ + {0, 0, 0}, // start of file + {0, 10, 10}, // "F" in "Foo" + {1, 2, 19}, // "d" in "def" + {2, 0, 36}, // "e" in "end" + } + for _, tt := range tests { + gotLine, gotCol := OffsetToLineCol(result.LineStarts, tt.offset) + if gotLine != tt.wantLine || gotCol != tt.wantCol { + t.Errorf("OffsetToLineCol(offset=%d) = (%d, %d), want (%d, %d)", tt.offset, gotLine, gotCol, tt.wantLine, tt.wantCol) + } + } + + // Out-of-range line + if gotLine, gotCol := OffsetToLineCol(result.LineStarts, -1); gotLine != -1 || gotCol != -1 { + t.Errorf("expected (-1, -1) for negative offset, got (%d, %d)", gotLine, gotCol) + } +} + func TestBareMacroCall_CommentBetweenArgsAndDo(t *testing.T) { source := `defmodule Test do use SomeMacroLib diff --git a/internal/parser/parser_tokenized.go b/internal/parser/parser_tokenized.go index 04d6153..b9000ac 100644 --- a/internal/parser/parser_tokenized.go +++ b/internal/parser/parser_tokenized.go @@ -758,6 +758,27 @@ func LineColToOffset(lineStarts []int, line, col int) int { return lineStarts[line] + col } +// OffsetToLineCol is the reverse of LineColToOffset. Converts a byte offset +// to a 0-based (line, col) pair using the LineStarts table from TokenizeFull. +// Returns (-1, -1) for negative offsets. +func OffsetToLineCol(lineStarts []int, offset int) (line, col int) { + if offset < 0 { + return -1, -1 + } + + var lineStart int + for i := range lineStarts { + if lineStarts[i] > offset { + break + } + + lineStart = lineStarts[i] + line = i + } + + return line, offset - lineStart +} + // TokenAtOffset returns the index of the token containing byteOffset, or -1 // if the offset falls in a gap between tokens (whitespace) or is out of range. // Uses binary search for O(log n) lookup. diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 72772a4..0fc7ab6 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,7 +1,7 @@ package treesitter import ( - "log" + "slices" "strings" tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" @@ -39,15 +39,109 @@ func parseHeex(src []byte) (root *tree_sitter.Node, cleanup func()) { } } -func ParseHeexExpr(src []byte, offset uint) { +// A HEEX expression may either be a: +// - function call with optional module prefix +// - interpolated expression (`{..}` between curly braces) +// If `expr` is set, `module` / `function` will be empty. +// Otherwise, `function` will be set and `module` may also be set. +// `module` will be either a single module e.g. `Foo` or module chain e.g. `Foo.Bar` +type HeexExpr struct { + Module, Function string + Expr string + Offset uint +} + +func ParseHeexExpr(src []byte, cursorOffset uint) *HeexExpr { root, cleanup := parseHeex(src) if root == nil { - return + return nil } defer cleanup() - expr := root.DescendantForByteRange(offset, offset) - log.Printf("expr: %s %s\n", expr.ToSexp(), strings.TrimSpace(expr.Utf8Text(src))) + expr := root.DescendantForByteRange(cursorOffset, cursorOffset) + if expr == nil { + return nil + } + + if expr.Kind() == "expression_value" { + return &HeexExpr{ + Expr: expr.Utf8Text(src), + Offset: expr.StartByte(), + } + } + + // Look for module, function, or module.function expression under cursor, e.g. + // `Foo`, `bar`, `Foo.bar`, or `Foo.Bar.baz` + if expr.Kind() != "." && expr.Kind() != "module" && expr.Kind() != "function" { + return nil + } + + // Find the nearest ancestor with one of the given kinds. + nearest := func(node *tree_sitter.Node, kinds ...string) *tree_sitter.Node { + for ; node != nil; node = node.Parent() { + if slices.Contains(kinds, node.Kind()) { + return node + } + } + return nil + } + + // Find the first named child of the given kind. + namedChild := func(node *tree_sitter.Node, kind string) *tree_sitter.Node { + for i := uint(0); i < node.NamedChildCount(); i++ { + child := node.NamedChild(i) + if child.Kind() == kind { + return child + } + } + return nil + } + + tag := nearest(expr, "component", "self_closing_component") + if tag == nil { + return nil + } + + var compName *tree_sitter.Node + if tag.Kind() == "component" { + // <.foo>.. + // (component (start_component (component_name _)) _) + startComp := namedChild(tag, "start_component") + if startComp == nil { + return nil + } + + compName = namedChild(startComp, "component_name") + } else if tag.Kind() == "self_closing_component" { + // <.foo /> + // (self_closing_component (component_name _) _) + compName = namedChild(tag, "component_name") + } + + if compName == nil { + return nil + } + + moduleNode := namedChild(compName, "module") + funcNode := namedChild(compName, "function") + + var module string + var offset uint + // module prefix is optional + if moduleNode != nil { + module = moduleNode.Utf8Text(src) + offset = moduleNode.StartByte() + } else { + offset = funcNode.StartByte() + } + + function := funcNode.Utf8Text(src) + + return &HeexExpr{ + Module: module, + Function: function, + Offset: offset, + } } // VariableOccurrence is a position where a variable name appears. diff --git a/internal/treesitter/variables_test.go b/internal/treesitter/variables_test.go index 4400e29..eca0f2c 100644 --- a/internal/treesitter/variables_test.go +++ b/internal/treesitter/variables_test.go @@ -3,6 +3,8 @@ package treesitter import ( "strings" "testing" + + "github.com/google/go-cmp/cmp" ) func TestFindVariableOccurrences_BasicVariable(t *testing.T) { @@ -1555,5 +1557,133 @@ config :app, value: some_helper() occs := FindVariableOccurrences(src, 2, uint(len("config :app, value: "))) if occs != nil { t.Errorf("expected nil for bare top-level call, got %d occurrences: %+v", len(occs), occs) + + } +} + +func TestParseHeexExpr_Empty(t *testing.T) { + src := []byte("") + + expr := ParseHeexExpr(src, 0) + if expr != nil { + t.Errorf("expected nil on empty input, got %#v", expr) + } +} + +func TestParseHeexExpr_HTMLTag(t *testing.T) { + src := []byte("
Hello, world!
") + + expr := ParseHeexExpr(src, 1) + if expr != nil { + t.Errorf("expected nil on raw HTML tag, got %#v", expr) + } +} + +func TestParseHeexExpr_Component(t *testing.T) { + tests := []struct { + src string + offset uint + want *HeexExpr + }{ + {"<.foo>Hello, world!", 2, &HeexExpr{ + Function: "foo", + Offset: 2, + }}, + // cursor on "." should nudge to the right + {"<.foo>Hello, world!", 1, &HeexExpr{ + Function: "foo", + Offset: 2, + }}, + {"Hello, world!", 1, &HeexExpr{ + Module: "Foo", + Function: "bar", + Offset: 1, + }}, + {"Hello, world!", 1, &HeexExpr{ + Module: "Foo.Bar", + Function: "baz", + Offset: 1, + }}, + // cursor on close tag should also point to component name + {"Hello, world!", 28, &HeexExpr{ + Module: "Foo.Bar", + Function: "baz", + Offset: 1, + }}, + // nested components should also work + {"
Hello, world!
", 6, &HeexExpr{ + Module: "Foo.Bar", + Function: "baz", + Offset: 6, + }}, + } + for _, tt := range tests { + got := ParseHeexExpr([]byte(tt.src), tt.offset) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("ParseHeexExpr(%#v, %d)\nparse mismatch (-want +got):\n%s", tt.src, tt.offset, diff) + } + } +} + +func TestParseHeexExpr_SelfClosingComponent(t *testing.T) { + tests := []struct { + src string + offset uint + want *HeexExpr + }{ + {"<.foo />", 2, &HeexExpr{ + Function: "foo", + Offset: 2, + }}, + // cursor on "." should nudge to the right + {"<.foo />", 1, &HeexExpr{ + Function: "foo", + Offset: 2, + }}, + {"", 1, &HeexExpr{ + Module: "Foo", + Function: "bar", + Offset: 1, + }}, + {"", 1, &HeexExpr{ + Module: "Foo.Bar", + Function: "baz", + Offset: 1, + }}, + {"
", 6, &HeexExpr{ + Module: "Foo.Bar", + Function: "baz", + Offset: 6, + }}, + } + for _, tt := range tests { + got := ParseHeexExpr([]byte(tt.src), tt.offset) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("ParseHeexExpr(%#v, %d)\nparse mismatch (-want +got):\n%s", tt.src, tt.offset, diff) + } + } +} + +func TestParseHeexExpr_Expression(t *testing.T) { + tests := []struct { + src string + offset uint + want *HeexExpr + }{ + {"
", 12, &HeexExpr{ + Expr: "class()", + Offset: 12, + }}, + // cursor within expression should still point to start offset of expression + {"
", 14, &HeexExpr{ + Expr: "class()", + Offset: 12, + }}, + } + for _, tt := range tests { + got := ParseHeexExpr([]byte(tt.src), tt.offset) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("ParseHeexExpr(%#v, %d)\nparse mismatch (-want +got):\n%s", tt.src, tt.offset, diff) + } } } diff --git a/page_live.ex b/page_live.ex deleted file mode 100644 index 9fa2b04..0000000 --- a/page_live.ex +++ /dev/null @@ -1,61 +0,0 @@ -defmodule Components do - defmacro __using__(_opts) do - quote do - import Components.Baz - end - end -end - -defmodule Components.Foo do - def foo do - ~H""" - """ - end -end - -defmodule Components.Bar do - def bar do - ~H""" - """ - end -end - -defmodule Components.Baz do - def baz do - ~H""" - """ - end -end - -defmodule Components.Quux do - use Phoenix.LiveComponent - - def render do - ~H""" - """ - end -end - -defmodule PageLive do - alias Components.Foo - alias Components.Quux - - import Components.Bar - - def render(assigns) do - ~H""" -
- - <.bar /> - <.baz /> - <.garply /> - <.live_component module={Quux} id="quux" /> -
- """ - end - - defp garply(assigns) do - ~H""" - """ - end -end From d10bdc463cf4d7477382b5ebb0e2aab0e78012ce Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sat, 6 Jun 2026 21:14:35 -0700 Subject: [PATCH 04/31] review feedback - support multicharacter sigils - newline after sigil delimiter is optional - ExprStart / ExprEnd are line-relative --- internal/lsp/elixir.go | 35 ++++++++++++++++++++++++----------- internal/lsp/elixir_test.go | 15 ++++++++++++--- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/internal/lsp/elixir.go b/internal/lsp/elixir.go index ae3b8cb..e1ddca5 100644 --- a/internal/lsp/elixir.go +++ b/internal/lsp/elixir.go @@ -346,15 +346,28 @@ func isExprToken(k parser.TokenKind) bool { // Remove the sigil prefix and suffix, returning the contents and length of the prefix. // Supports both double ("") and single (”) quotes, written inline and as heredocs. func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, prefixLen int) { - // remove ~H prefix - prefixLen += 2 + // remove ~ prefix + prefixLen = 1 + + // remove sigil character(s) + // > Custom sigils may be either a single lowercase character, or an uppercase + // > character followed by more uppercase characters and digits. + // > https://elixir.hexdocs.pm/sigils.html + for { + c := rune(source[prefixLen]) + if unicode.IsLetter(c) || unicode.IsDigit(c) { + prefixLen += 1 + } else { + break + } + } // check for heredoc and trailing newline, fallback on inline sigil quotes := string(source[tok.Start+prefixLen : tok.Start+prefixLen+4]) var delimLen int - if quotes == "\"\"\"\n" || quotes == "'''\n" { - delimLen = 4 + if quotes == "\"\"\"" || quotes == "'''" { + delimLen = 3 } else { delimLen = 1 } @@ -418,6 +431,11 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i } } + lineStart := 0 + if line < len(lineStarts) { + lineStart = lineStarts[line] + } + // Parse `~H` HEEX sigils if tok.Kind == parser.TokSigil { xml, sigil, prefixLen := sigilContents(tok, source) @@ -441,7 +459,7 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i if !ctx.Empty() { // We've parsed the expression as though it were its own document, // so we need to offset it's start/end indices to the outer document. - nestedOffset := tok.Start + prefixLen + int(heexExpr.Offset) + nestedOffset := tok.Start + prefixLen + int(heexExpr.Offset) - lineStart ctx.ExprStart += nestedOffset ctx.ExprEnd += nestedOffset } @@ -453,7 +471,7 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i // ^^^^^^^ // <.baz> // ^^^ - exprStart := tok.Start + prefixLen + int(heexExpr.Offset) + exprStart := tok.Start + prefixLen + int(heexExpr.Offset) - lineStart exprEnd := exprStart + len(heexExpr.Module) + len(heexExpr.Function) if heexExpr.Module != "" { // offset by additional 1 for "." dot character between module/function @@ -508,11 +526,6 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i } // Build module ref and function name from the token chain - lineStart := 0 - if line < len(lineStarts) { - lineStart = lineStarts[line] - } - var moduleParts []string functionName := "" diff --git a/internal/lsp/elixir_test.go b/internal/lsp/elixir_test.go index d4a5dc9..c4339b9 100644 --- a/internal/lsp/elixir_test.go +++ b/internal/lsp/elixir_test.go @@ -443,18 +443,27 @@ func TestExpressionAtCursor_HEEX(t *testing.T) { line, col int want CursorContext }{ - {"~H\"\"\"\n<.foo />\n\"\"\"", 1, 2, CursorContext{FunctionName: "foo", ExprStart: 8, ExprEnd: 11}}, - {"~H'''\n<.foo />\n'''", 1, 2, CursorContext{FunctionName: "foo", ExprStart: 8, ExprEnd: 11}}, + // all delimiter styles should be supported + {"~H\"\"\"\n<.foo />\n\"\"\"", 1, 2, CursorContext{FunctionName: "foo", ExprStart: 2, ExprEnd: 5}}, + {"~H'''\n<.foo />\n'''", 1, 2, CursorContext{FunctionName: "foo", ExprStart: 2, ExprEnd: 5}}, {"~H\"<.foo />\"", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, {"~H'<.foo />'", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, {"~H[<.foo />]", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, + // newline after delimiter is optional + {"~H\"\"\"<.foo />\"\"\"", 0, 7, CursorContext{FunctionName: "foo", ExprStart: 7, ExprEnd: 10}}, {"~H[]", 0, 5, CursorContext{ModuleRef: "Foo", FunctionName: "bar", ExprStart: 4, ExprEnd: 11}}, {"~H[<.live_component module={Foo.Bar} />]", 0, 28, CursorContext{ModuleRef: "Foo", ExprStart: 28, ExprEnd: 31}}, {"~H[<.live_component module={Foo.Bar} />]", 0, 32, CursorContext{ModuleRef: "Foo.Bar", ExprStart: 28, ExprEnd: 35}}, + {"~H'''\n<.live_component module={Foo.Bar} />\n'''", 1, 29, CursorContext{ModuleRef: "Foo.Bar", ExprStart: 25, ExprEnd: 32}}, // interpolated expressions that aren't module/function should be ignored {"~H[
]", 0, 11, CursorContext{}}, // HTML tags should be ignored {"~H[
]", 0, 4, CursorContext{}}, + // custom sigils should be parsed correctly but ignored + {"~x[_]", 0, 3, CursorContext{}}, + {"~X[_]", 0, 3, CursorContext{}}, + {"~XXX[_]", 0, 5, CursorContext{}}, + {"~X12[_]", 0, 5, CursorContext{}}, } for _, tt := range tests { @@ -613,7 +622,7 @@ end` text := `defmodule MyApp.Web do alias MyApp.Services.{ Accounts, - + def foo do # missing close brace end From 1214aeafeee99345f78b624b1d58efa4dafcda0a Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sat, 6 Jun 2026 21:44:13 -0700 Subject: [PATCH 05/31] review feedback - sigil delim offset relative to tok.Start - ignore when cursor is on sigil char/delim - fix possible nil access --- internal/lsp/elixir.go | 7 ++++--- internal/treesitter/variables.go | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/lsp/elixir.go b/internal/lsp/elixir.go index e1ddca5..46d2404 100644 --- a/internal/lsp/elixir.go +++ b/internal/lsp/elixir.go @@ -354,7 +354,7 @@ func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, p // > character followed by more uppercase characters and digits. // > https://elixir.hexdocs.pm/sigils.html for { - c := rune(source[prefixLen]) + c := rune(source[tok.Start+prefixLen]) if unicode.IsLetter(c) || unicode.IsDigit(c) { prefixLen += 1 } else { @@ -363,7 +363,7 @@ func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, p } // check for heredoc and trailing newline, fallback on inline sigil - quotes := string(source[tok.Start+prefixLen : tok.Start+prefixLen+4]) + quotes := string(source[tok.Start+prefixLen:][:4]) var delimLen int if quotes == "\"\"\"" || quotes == "'''" { @@ -439,7 +439,8 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i // Parse `~H` HEEX sigils if tok.Kind == parser.TokSigil { xml, sigil, prefixLen := sigilContents(tok, source) - if sigil != "H" { + // ignore non-H sigils and when the cursor is on the sigil character/delimiter + if sigil != "H" || offset < tok.Start+prefixLen { return CursorContext{} } sigilOffset := offset - (tok.Start + prefixLen) diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 0fc7ab6..edd8c1e 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -124,6 +124,9 @@ func ParseHeexExpr(src []byte, cursorOffset uint) *HeexExpr { moduleNode := namedChild(compName, "module") funcNode := namedChild(compName, "function") + if funcNode == nil { + return nil + } var module string var offset uint From 0c961b45739104071c871362fcd0f84acf0dfe0e Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sat, 6 Jun 2026 21:59:52 -0700 Subject: [PATCH 06/31] ignore trailing modifer chars for sigil contents --- internal/lsp/elixir.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/lsp/elixir.go b/internal/lsp/elixir.go index 46d2404..873282c 100644 --- a/internal/lsp/elixir.go +++ b/internal/lsp/elixir.go @@ -362,7 +362,7 @@ func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, p } } - // check for heredoc and trailing newline, fallback on inline sigil + // check for heredoc, fallback on inline sigil quotes := string(source[tok.Start+prefixLen:][:4]) var delimLen int @@ -372,8 +372,14 @@ func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, p delimLen = 1 } + // ignore trailing modifier characters + end := tok.End + for unicode.IsLetter(rune(source[end-1])) { + end-- + } + start := tok.Start + prefixLen + delimLen - end := tok.End - delimLen + end -= delimLen return source[start:end], string(source[tok.Start:][1:2]), prefixLen + delimLen } From bc3b0720f585ae89eebeb3983c93d897fad81b6c Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sun, 7 Jun 2026 13:56:41 -0700 Subject: [PATCH 07/31] index HEEX template contents via tree-sitter-heex --- internal/lsp/elixir.go | 104 +----------- internal/lsp/elixir_test.go | 3 +- internal/lsp/server_test.go | 64 +++++++ internal/parser/parser_tokenized.go | 21 --- internal/parser/tokenizer.go | 255 ++++++++++++++++++---------- internal/treesitter/variables.go | 117 +++---------- 6 files changed, 258 insertions(+), 306 deletions(-) diff --git a/internal/lsp/elixir.go b/internal/lsp/elixir.go index 873282c..783585e 100644 --- a/internal/lsp/elixir.go +++ b/internal/lsp/elixir.go @@ -7,7 +7,6 @@ import ( "unicode" "github.com/remoteoss/dexter/internal/parser" - "github.com/remoteoss/dexter/internal/treesitter" ) // TokenizedFile holds pre-tokenized source for efficient multi-operation queries. @@ -343,47 +342,6 @@ func isExprToken(k parser.TokenKind) bool { return k == parser.TokModule || k == parser.TokIdent || k == parser.TokAtom } -// Remove the sigil prefix and suffix, returning the contents and length of the prefix. -// Supports both double ("") and single (”) quotes, written inline and as heredocs. -func sigilContents(tok parser.Token, source []byte) (xml []byte, sigil string, prefixLen int) { - // remove ~ prefix - prefixLen = 1 - - // remove sigil character(s) - // > Custom sigils may be either a single lowercase character, or an uppercase - // > character followed by more uppercase characters and digits. - // > https://elixir.hexdocs.pm/sigils.html - for { - c := rune(source[tok.Start+prefixLen]) - if unicode.IsLetter(c) || unicode.IsDigit(c) { - prefixLen += 1 - } else { - break - } - } - - // check for heredoc, fallback on inline sigil - quotes := string(source[tok.Start+prefixLen:][:4]) - - var delimLen int - if quotes == "\"\"\"" || quotes == "'''" { - delimLen = 3 - } else { - delimLen = 1 - } - - // ignore trailing modifier characters - end := tok.End - for unicode.IsLetter(rune(source[end-1])) { - end-- - } - - start := tok.Start + prefixLen + delimLen - end -= delimLen - - return source[start:end], string(source[tok.Start:][1:2]), prefixLen + delimLen -} - // ExpressionAtCursor extracts the dotted expression at the cursor position // using the token stream. Unlike the char-based ExtractExpression, this // correctly ignores expressions inside strings, comments, heredocs, sigils, @@ -437,63 +395,6 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i } } - lineStart := 0 - if line < len(lineStarts) { - lineStart = lineStarts[line] - } - - // Parse `~H` HEEX sigils - if tok.Kind == parser.TokSigil { - xml, sigil, prefixLen := sigilContents(tok, source) - // ignore non-H sigils and when the cursor is on the sigil character/delimiter - if sigil != "H" || offset < tok.Start+prefixLen { - return CursorContext{} - } - sigilOffset := offset - (tok.Start + prefixLen) - heexExpr := treesitter.ParseHeexExpr(xml, uint(sigilOffset)) - if heexExpr == nil { - return CursorContext{} - } - - if heexExpr.Expr != "" { - // Recursively parse interpolated HEEX expressions. - //
- // ^^^^^^^ - tf := NewTokenizedFile(heexExpr.Expr) - line, col := parser.OffsetToLineCol(tf.lineStarts, sigilOffset-int(heexExpr.Offset)) - ctx := expressionAtCursorImpl(tf.tokens, tf.source, tf.lineStarts, line, col, full) - - if !ctx.Empty() { - // We've parsed the expression as though it were its own document, - // so we need to offset it's start/end indices to the outer document. - nestedOffset := tok.Start + prefixLen + int(heexExpr.Offset) - lineStart - ctx.ExprStart += nestedOffset - ctx.ExprEnd += nestedOffset - } - - return ctx - } else { - // Heex function components are syntax sugar for module/function expressions. - // - // ^^^^^^^ - // <.baz> - // ^^^ - exprStart := tok.Start + prefixLen + int(heexExpr.Offset) - lineStart - exprEnd := exprStart + len(heexExpr.Module) + len(heexExpr.Function) - if heexExpr.Module != "" { - // offset by additional 1 for "." dot character between module/function - exprEnd += 1 - } - - return CursorContext{ - ModuleRef: heexExpr.Module, - FunctionName: heexExpr.Function, - ExprStart: exprStart, - ExprEnd: exprEnd, - } - } - } - // Reject non-expression tokens (strings, comments, atoms, etc.) if !isExprToken(tok.Kind) { return CursorContext{} @@ -533,6 +434,11 @@ func expressionAtCursorImpl(tokens []parser.Token, source []byte, lineStarts []i } // Build module ref and function name from the token chain + lineStart := 0 + if line < len(lineStarts) { + lineStart = lineStarts[line] + } + var moduleParts []string functionName := "" diff --git a/internal/lsp/elixir_test.go b/internal/lsp/elixir_test.go index c4339b9..058c8ac 100644 --- a/internal/lsp/elixir_test.go +++ b/internal/lsp/elixir_test.go @@ -451,7 +451,8 @@ func TestExpressionAtCursor_HEEX(t *testing.T) { {"~H[<.foo />]", 0, 5, CursorContext{FunctionName: "foo", ExprStart: 5, ExprEnd: 8}}, // newline after delimiter is optional {"~H\"\"\"<.foo />\"\"\"", 0, 7, CursorContext{FunctionName: "foo", ExprStart: 7, ExprEnd: 10}}, - {"~H[]", 0, 5, CursorContext{ModuleRef: "Foo", FunctionName: "bar", ExprStart: 4, ExprEnd: 11}}, + {"~H[]", 0, 5, CursorContext{ModuleRef: "Foo", ExprStart: 4, ExprEnd: 7}}, + {"~H[]", 0, 9, CursorContext{ModuleRef: "Foo", FunctionName: "bar", ExprStart: 4, ExprEnd: 11}}, {"~H[<.live_component module={Foo.Bar} />]", 0, 28, CursorContext{ModuleRef: "Foo", ExprStart: 28, ExprEnd: 31}}, {"~H[<.live_component module={Foo.Bar} />]", 0, 32, CursorContext{ModuleRef: "Foo.Bar", ExprStart: 28, ExprEnd: 35}}, {"~H'''\n<.live_component module={Foo.Bar} />\n'''", 1, 29, CursorContext{ModuleRef: "Foo.Bar", ExprStart: 25, ExprEnd: 32}}, diff --git a/internal/lsp/server_test.go b/internal/lsp/server_test.go index 4477684..6c0deff 100644 --- a/internal/lsp/server_test.go +++ b/internal/lsp/server_test.go @@ -2207,6 +2207,70 @@ end` } } +func TestDefinition_HEEXFunction(t *testing.T) { + server, cleanup := setupTestServer(t) + defer cleanup() + + src := `defmodule TestLive do + use Phoenix.LiveView + + def render(assigns) do + ~H""" + <.foo /> + +
+ """ + end + + defp foo(_), do: ~H"" + defp class, do: "" +end` + + uri := "file://" + filepath.Join(server.projectRoot, "test_live.ex") + indexFile(t, server.store, server.projectRoot, "test_live.ex", src) + server.docs.Set(uri, src) + + // Cursor on "foo" at line 6 col 6 (the `<.foo />` component inside `render`) + locs := definitionAt(t, server, uri, 5, 6) + if len(locs) == 0 { + t.Fatal("expected go-to-definition for function 'foo'") + } + // Should jump to line 11 where `foo` is defined + if locs[0].Range.Start.Line != 11 { + t.Errorf("expected definition on line 9, got line %d", locs[0].Range.Start.Line) + } + + // Cursor on "TestLive.foo" at line 7 col 6 (the `TestLive` module of ``) + locs = definitionAt(t, server, uri, 6, 6) + if len(locs) == 0 { + t.Fatal("expected go-to-definition for module 'TestLive'") + } + // Should jump to line 1 where `TestLive` is defined + if locs[0].Range.Start.Line != 0 { + t.Errorf("expected definition on line 1, got line %d", locs[0].Range.Start.Line) + } + + // Cursor on "TestLive.foo" at line 7 col 15 (the `foo` function of ``) + locs = definitionAt(t, server, uri, 6, 15) + if len(locs) == 0 { + t.Fatal("expected go-to-definition for function 'foo'") + } + // Should jump to line 11 where `foo` is defined + if locs[0].Range.Start.Line != 11 { + t.Errorf("expected definition on line 11, got line %d", locs[0].Range.Start.Line) + } + + // Cursor on "class()" at line 8 col 16 (the `class()` call of `
`) + locs = definitionAt(t, server, uri, 7, 16) + if len(locs) == 0 { + t.Fatal("expected go-to-definition for function 'class'") + } + // Should jump to line 12 where `class` is defined + if locs[0].Range.Start.Line != 12 { + t.Errorf("expected definition on line 12, got line %d", locs[0].Range.Start.Line) + } +} + func TestHover_AliasInjectedByUse(t *testing.T) { server, cleanup := setupTestServer(t) defer cleanup() diff --git a/internal/parser/parser_tokenized.go b/internal/parser/parser_tokenized.go index b9000ac..04d6153 100644 --- a/internal/parser/parser_tokenized.go +++ b/internal/parser/parser_tokenized.go @@ -758,27 +758,6 @@ func LineColToOffset(lineStarts []int, line, col int) int { return lineStarts[line] + col } -// OffsetToLineCol is the reverse of LineColToOffset. Converts a byte offset -// to a 0-based (line, col) pair using the LineStarts table from TokenizeFull. -// Returns (-1, -1) for negative offsets. -func OffsetToLineCol(lineStarts []int, offset int) (line, col int) { - if offset < 0 { - return -1, -1 - } - - var lineStart int - for i := range lineStarts { - if lineStarts[i] > offset { - break - } - - lineStart = lineStarts[i] - line = i - } - - return line, offset - lineStart -} - // TokenAtOffset returns the index of the token containing byteOffset, or -1 // if the offset falls in a gap between tokens (whitespace) or is out of range. // Uses binary search for O(log n) lookup. diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index c43471e..2476713 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -3,6 +3,8 @@ package parser import ( "unicode" "unicode/utf8" + + "github.com/remoteoss/dexter/internal/treesitter" ) // TokenKind identifies the kind of a lexed token. @@ -235,20 +237,7 @@ func TokenizeFull(source []byte) TokenResult { // Single-char sigils: ~r, ~s, ~S, etc. // Multi-char sigils (Elixir 1.15+): ~HTML, ~HEEX — uppercase only. if i+1 < len(source) && isLetter(source[i+1]) { - start := i - startLine := line - sigilLetter := source[i+1] - i += 2 // consume ~ and first letter - // Multi-char sigils: continue reading uppercase letters - if isUpper(sigilLetter) { - for i < len(source) && isUpper(source[i]) { - i++ - } - } - if i < len(source) { - i, line = scanSigilContent(source, i, line, sigilLetter, &lineStarts) - } - tokens = append(tokens, Token{Kind: TokSigil, Start: start, End: i, Line: startLine}) + i, line = scanSigil(source, i, line, &lineStarts, &tokens) } else { tokens = append(tokens, Token{Kind: TokOther, Start: i, End: i + 1, Line: line}) i++ @@ -617,11 +606,7 @@ func scanInterpolation(source []byte, i, line int, lineStarts *[]int) (int, int) i++ // single char like ?} or ?a } case c == '~' && i+1 < len(source) && isLetter(source[i+1]): - sigilLetter := source[i+1] - i += 2 // consume ~ and letter - if i < len(source) { - i, line = scanSigilContent(source, i, line, sigilLetter, lineStarts) - } + i, line = scanSigil(source, i, line, lineStarts, nil) case c == '#' && i+1 < len(source) && source[i+1] == '{': i += 2 i, line = scanInterpolation(source, i, line, lineStarts) @@ -672,115 +657,201 @@ func scanHeredocContent(source []byte, i, line int, delim byte, lineStarts *[]in return i, line } -// scanSigilContent scans from the opening delimiter of a sigil to its closing delimiter, -// including any trailing modifier letters. Returns new position and updated line count. -// sigilLetter is the letter after ~ (e.g. 's' in ~s, 'S' in ~S). Uppercase sigil letters -// mean the content is "raw" — backslash is NOT an escape character. -func scanSigilContent(source []byte, i, line int, sigilLetter byte, lineStarts *[]int) (int, int) { +// scanSigil scans from the start of a sigil to its closing delimiter, including any trailing +// modifier letters. Returns new position and updated line count, adding any tokens encountered +// along the way if `tokens` is provided. +func scanSigil(source []byte, i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { if i >= len(source) { return i, line } + // sigilLetter is the letter after ~ (e.g. 's' in ~s, 'S' in ~S). Uppercase sigil + // letters mean the content is "raw" — backslash is NOT an escape character. + start := i + startLine := line + sigilLetter := source[i+1] + i += 2 // consume ~ and first letter + // Multi-char sigils: continue reading uppercase letters/numbers + if isUpper(sigilLetter) { + for i < len(source) && (isUpper(source[i]) || isDigit(source[i])) { + i++ + } + } + + sigilChars := string(source[start+1 : i]) + escapes := isLower(sigilLetter) // only lowercase sigils process escapes openCh := source[i] + var contentsStart, contentsEnd int + // Check for heredoc sigil: ~s""" or ~S""" if openCh == '"' && i+2 < len(source) && source[i+1] == '"' && source[i+2] == '"' { i += 3 // consume """ + contentsStart = i if escapes { i, line = scanHeredocContent(source, i, line, '"', lineStarts) } else { i, line = scanRawHeredocContent(source, i, line, '"', lineStarts) } - return i, line - } - if openCh == '\'' && i+2 < len(source) && source[i+1] == '\'' && source[i+2] == '\'' { + contentsEnd = i - 3 + } else if openCh == '\'' && i+2 < len(source) && source[i+1] == '\'' && source[i+2] == '\'' { i += 3 // consume ''' + contentsStart = i if escapes { i, line = scanHeredocContent(source, i, line, '\'', lineStarts) } else { i, line = scanRawHeredocContent(source, i, line, '\'', lineStarts) } - return i, line - } - - i++ // consume opening delimiter - - var closeCh byte - nested := false - - switch openCh { - case '(': - closeCh = ')' - nested = true - case '[': - closeCh = ']' - nested = true - case '{': - closeCh = '}' - nested = true - case '<': - closeCh = '>' - nested = true - default: - closeCh = openCh - nested = false - } + contentsEnd = i - 3 + } else { + i++ // consume opening delimiter + contentsStart = i + + var closeCh byte + nested := false + + switch openCh { + case '(': + closeCh = ')' + nested = true + case '[': + closeCh = ']' + nested = true + case '{': + closeCh = '}' + nested = true + case '<': + closeCh = '>' + nested = true + default: + closeCh = openCh + nested = false + } - if nested { - depth := 1 - for i < len(source) && depth > 0 { - ch := source[i] - if ch == '\n' { - line++ - i++ - *lineStarts = append(*lineStarts, i) - } else if escapes && ch == '\\' && i+1 < len(source) { - if source[i+1] == '\n' { + if nested { + depth := 1 + for i < len(source) && depth > 0 { + ch := source[i] + if ch == '\n' { line++ - *lineStarts = append(*lineStarts, i+2) + i++ + *lineStarts = append(*lineStarts, i) + } else if escapes && ch == '\\' && i+1 < len(source) { + if source[i+1] == '\n' { + line++ + *lineStarts = append(*lineStarts, i+2) + } + i += 2 + } else if ch == openCh { + depth++ + i++ + } else if ch == closeCh { + depth-- + i++ + } else { + i++ } - i += 2 - } else if ch == openCh { - depth++ - i++ - } else if ch == closeCh { - depth-- - i++ - } else { - i++ } - } - } else { - for i < len(source) { - ch := source[i] - if ch == '\n' { - line++ - i++ - *lineStarts = append(*lineStarts, i) - } else if escapes && ch == '\\' && i+1 < len(source) { - if source[i+1] == '\n' { + } else { + for i < len(source) { + ch := source[i] + if ch == '\n' { line++ - *lineStarts = append(*lineStarts, i+2) + i++ + *lineStarts = append(*lineStarts, i) + } else if escapes && ch == '\\' && i+1 < len(source) { + if source[i+1] == '\n' { + line++ + *lineStarts = append(*lineStarts, i+2) + } + i += 2 + } else if ch == closeCh { + i++ // consume closing delimiter + break + } else { + i++ } - i += 2 - } else if ch == closeCh { - i++ // consume closing delimiter - break - } else { - i++ } } + + contentsEnd = i - 1 + + // Consume trailing modifier letters (e.g. the 'i' in ~r/foo/i) + for i < len(source) && isLetter(source[i]) { + i++ + } } - // Consume trailing modifier letters (e.g. the 'i' in ~r/foo/i) - for i < len(source) && isLetter(source[i]) { - i++ + // emit tokens if requested + if tokens != nil { + scanSigilContents(sigilChars, source, start, i, contentsStart, contentsEnd, startLine, lineStarts, tokens) } return i, line } +func scanSigilContents(sigilChars string, source []byte, start, end, contentsStart, contentsEnd, line int, lineStarts *[]int, tokens *[]Token) (int, int) { + // only scan the contents of HEEX `~H` sigils + if sigilChars != "H" { + *tokens = append(*tokens, Token{Kind: TokSigil, Start: start, End: end, Line: line}) + return start, line + } + + lineOffset := func(src []byte, offset int) (lines int) { + for i := 0; i < offset; i++ { + if src[i] == '\n' { + lines++ + } + } + return + } + + xml := source[contentsStart:contentsEnd] + treesitter.ParseHeex(xml, func(kind, text string, offset int) { + lineInHeex := lineOffset(source, contentsStart+offset) + offset += contentsStart + n := len(text) + + switch kind { + case "expression_value": + res := TokenizeFull([]byte(text)) + + for _, t := range res.Tokens { + if t.Kind == TokEOF { + continue + } + *tokens = append(*tokens, Token{ + Kind: t.Kind, + Start: t.Start + offset, + End: t.End + offset, + Line: t.Line + line + lineInHeex, + }) + } + + // FIXME: how do we need to update lineStarts? + // for _, l := range res.LineStarts[1:] { + // *lineStarts = append(*lineStarts, line + lineInHeex) + // } + + case "module": + *tokens = append(*tokens, Token{Kind: TokModule, Start: offset, End: offset + n, Line: line + lineInHeex}) + + case "function": + *tokens = append(*tokens, Token{Kind: TokIdent, Start: offset, End: offset + n, Line: line + lineInHeex}) + + case ".": + *tokens = append(*tokens, Token{Kind: TokDot, Start: offset, End: offset + 1, Line: line + lineInHeex}) + + default: + // The remainder of the sigil's contents are ignored. + *tokens = append(*tokens, Token{Kind: TokOther, Start: offset, End: offset + n, Line: line + lineInHeex}) + } + }) + + return start, line +} + // scanRawHeredocContent scans a heredoc body where backslash is NOT an escape character // (used by uppercase sigils like ~S"""). Only tracks newlines and looks for closing delimiter. func scanRawHeredocContent(source []byte, i, line int, delim byte, lineStarts *[]int) (int, int) { diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index edd8c1e..b0d5b5d 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,7 +1,6 @@ package treesitter import ( - "slices" "strings" tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" @@ -39,111 +38,43 @@ func parseHeex(src []byte) (root *tree_sitter.Node, cleanup func()) { } } -// A HEEX expression may either be a: -// - function call with optional module prefix -// - interpolated expression (`{..}` between curly braces) -// If `expr` is set, `module` / `function` will be empty. -// Otherwise, `function` will be set and `module` may also be set. -// `module` will be either a single module e.g. `Foo` or module chain e.g. `Foo.Bar` -type HeexExpr struct { - Module, Function string - Expr string - Offset uint -} - -func ParseHeexExpr(src []byte, cursorOffset uint) *HeexExpr { +// ParseHeex parses the HEEX template in `src` and calls `onNode` for each leaf node +// it encounters. `onNode` is called with the leaf node's kind, text contents, and +// offset within the given `src` slice. +func ParseHeex(src []byte, onNode func(kind, text string, offset int)) { root, cleanup := parseHeex(src) if root == nil { - return nil + return } defer cleanup() - expr := root.DescendantForByteRange(cursorOffset, cursorOffset) - if expr == nil { - return nil - } + cursor := root.Walk() + defer cursor.Close() - if expr.Kind() == "expression_value" { - return &HeexExpr{ - Expr: expr.Utf8Text(src), - Offset: expr.StartByte(), + for { + // visit current node + node := cursor.Node() + if node.ChildCount() == 0 { + // notify visitor about leaf nodes + onNode(node.Kind(), node.Utf8Text(src), int(node.StartByte())) } - } - - // Look for module, function, or module.function expression under cursor, e.g. - // `Foo`, `bar`, `Foo.bar`, or `Foo.Bar.baz` - if expr.Kind() != "." && expr.Kind() != "module" && expr.Kind() != "function" { - return nil - } - // Find the nearest ancestor with one of the given kinds. - nearest := func(node *tree_sitter.Node, kinds ...string) *tree_sitter.Node { - for ; node != nil; node = node.Parent() { - if slices.Contains(kinds, node.Kind()) { - return node - } + // traverse down one level, if possible + if cursor.GotoFirstChild() { + continue } - return nil - } - // Find the first named child of the given kind. - namedChild := func(node *tree_sitter.Node, kind string) *tree_sitter.Node { - for i := uint(0); i < node.NamedChildCount(); i++ { - child := node.NamedChild(i) - if child.Kind() == kind { - return child + for { + // traverse via siblings, if possible + if cursor.GotoNextSibling() { + break } - } - return nil - } - tag := nearest(expr, "component", "self_closing_component") - if tag == nil { - return nil - } - - var compName *tree_sitter.Node - if tag.Kind() == "component" { - // <.foo>.. - // (component (start_component (component_name _)) _) - startComp := namedChild(tag, "start_component") - if startComp == nil { - return nil + // move back up and recurse, returning once we're back to the root + if !cursor.GotoParent() { + return + } } - - compName = namedChild(startComp, "component_name") - } else if tag.Kind() == "self_closing_component" { - // <.foo /> - // (self_closing_component (component_name _) _) - compName = namedChild(tag, "component_name") - } - - if compName == nil { - return nil - } - - moduleNode := namedChild(compName, "module") - funcNode := namedChild(compName, "function") - if funcNode == nil { - return nil - } - - var module string - var offset uint - // module prefix is optional - if moduleNode != nil { - module = moduleNode.Utf8Text(src) - offset = moduleNode.StartByte() - } else { - offset = funcNode.StartByte() - } - - function := funcNode.Utf8Text(src) - - return &HeexExpr{ - Module: module, - Function: function, - Offset: offset, } } From be3848d94a28484a7a3c99f52bd7efd44f00308f Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sun, 7 Jun 2026 14:35:02 -0700 Subject: [PATCH 08/31] wip: nested tree-sitter-heex while parsing tree-sitter-elixir --- internal/lsp/server.go | 4 +++ internal/treesitter/variables.go | 47 ++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/internal/lsp/server.go b/internal/lsp/server.go index c2fd345..73ed5d3 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -661,6 +661,10 @@ func (s *Server) Definition(ctx context.Context, params *protocol.DefinitionPara // The first occurrence in scope is the definition (pattern/assignment). if tree, src, release, ok := s.docs.GetTree(docURI); ok { defer release() + + // FIXME: remove + treesitter.FindVariableOccurrences(src, uint(lineNum), uint(col)) + if occs := treesitter.FindVariableOccurrencesWithTree(tree.RootNode(), src, uint(lineNum), uint(col)); len(occs) > 0 { s.debugf("Definition: returning variable definition at line %d", occs[0].Line) return []protocol.Location{{ diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index b0d5b5d..3223dc3 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,6 +1,7 @@ package treesitter import ( + "log" "strings" tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" @@ -24,6 +25,31 @@ func parseElixir(src []byte) (root *tree_sitter.Node, cleanup func()) { } } +// parseElixirExtended is similar to parseElixir but will also parse embedded HEEX templates +// and make them available in `heex`, keyed by the parent `quoted_contents` node in `root` +func parseElixirExtended(src []byte) (root *tree_sitter.Node, heex map[*tree_sitter.Node]*tree_sitter.Node, cleanup func()) { + root, cleanupElixir := parseElixir(src) + + var cleanupHeex [](func()) + heex = make(map[*tree_sitter.Node]*tree_sitter.Node) + visitTree(root, func(node *tree_sitter.Node) { + if node.Kind() == "quoted_content" && + node.Parent().Kind() == "sigil" && + /* sigil_name */ node.PrevNamedSibling().Utf8Text(src) == "H" { + heexRoot, cleanup_ := parseHeex(src[node.StartByte():node.EndByte()]) + heex[node] = heexRoot + cleanupHeex = append(cleanupHeex, cleanup_) + } + }) + + return root, heex, func() { + for _, c := range cleanupHeex { + c() + } + cleanupElixir() + } +} + // parseHeex parses HEEX present within ~H sigils and in `.heex` files func parseHeex(src []byte) (root *tree_sitter.Node, cleanup func()) { p := tree_sitter.NewParser() @@ -48,16 +74,21 @@ func ParseHeex(src []byte, onNode func(kind, text string, offset int)) { } defer cleanup() + visitTree(root, func(node *tree_sitter.Node) { + // notify visitor about leaf nodes + if node.ChildCount() == 0 { + onNode(node.Kind(), node.Utf8Text(src), int(node.StartByte())) + } + }) +} + +func visitTree(root *tree_sitter.Node, onNode func(node *tree_sitter.Node)) { cursor := root.Walk() defer cursor.Close() for { // visit current node - node := cursor.Node() - if node.ChildCount() == 0 { - // notify visitor about leaf nodes - onNode(node.Kind(), node.Utf8Text(src), int(node.StartByte())) - } + onNode(cursor.Node()) // traverse down one level, if possible if cursor.GotoFirstChild() { @@ -89,7 +120,11 @@ type VariableOccurrence struct { // occurrences of the variable at the given cursor position within the // enclosing function scope. Returns nil if the cursor is not on a variable. func FindVariableOccurrences(src []byte, line, col uint) []VariableOccurrence { - root, cleanup := parseElixir(src) + root, heex, cleanup := parseElixirExtended(src) + // FIXME: remove + for p, h := range heex { + log.Printf("%s\n%s\n", p.Utf8Text(src), h.ToSexp()) + } if root == nil { return nil } From 341f2907ed9e04535dce240e795431802cef0d8f Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sun, 7 Jun 2026 14:36:10 -0700 Subject: [PATCH 09/31] remove unused test --- internal/parser/parser_test.go | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/internal/parser/parser_test.go b/internal/parser/parser_test.go index da502dd..5b7181a 100644 --- a/internal/parser/parser_test.go +++ b/internal/parser/parser_test.go @@ -2673,32 +2673,6 @@ func TestLineColToOffset(t *testing.T) { } } -func TestOffsetToLineCol(t *testing.T) { - source := []byte("defmodule Foo do\n def bar, do: :ok\nend\n") - result := TokenizeFull(source) - - tests := []struct { - wantLine, wantCol int - offset int - }{ - {0, 0, 0}, // start of file - {0, 10, 10}, // "F" in "Foo" - {1, 2, 19}, // "d" in "def" - {2, 0, 36}, // "e" in "end" - } - for _, tt := range tests { - gotLine, gotCol := OffsetToLineCol(result.LineStarts, tt.offset) - if gotLine != tt.wantLine || gotCol != tt.wantCol { - t.Errorf("OffsetToLineCol(offset=%d) = (%d, %d), want (%d, %d)", tt.offset, gotLine, gotCol, tt.wantLine, tt.wantCol) - } - } - - // Out-of-range line - if gotLine, gotCol := OffsetToLineCol(result.LineStarts, -1); gotLine != -1 || gotCol != -1 { - t.Errorf("expected (-1, -1) for negative offset, got (%d, %d)", gotLine, gotCol) - } -} - func TestBareMacroCall_CommentBetweenArgsAndDo(t *testing.T) { source := `defmodule Test do use SomeMacroLib From 6dfc7eaff466a76454a8fe9026459f4dd3002374 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Sun, 7 Jun 2026 14:55:44 -0700 Subject: [PATCH 10/31] review feedback - handle parseElixir / parseHeex failure - don't double-count line offset --- internal/parser/tokenizer.go | 14 +++++++------- internal/treesitter/variables.go | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 2476713..f84802c 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -809,7 +809,7 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta xml := source[contentsStart:contentsEnd] treesitter.ParseHeex(xml, func(kind, text string, offset int) { - lineInHeex := lineOffset(source, contentsStart+offset) + line_ := lineOffset(source, contentsStart+offset) + 1 offset += contentsStart n := len(text) @@ -825,27 +825,27 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta Kind: t.Kind, Start: t.Start + offset, End: t.End + offset, - Line: t.Line + line + lineInHeex, + Line: t.Line + line_ - 1, }) } // FIXME: how do we need to update lineStarts? // for _, l := range res.LineStarts[1:] { - // *lineStarts = append(*lineStarts, line + lineInHeex) + // *lineStarts = append(*lineStarts, line_) // } case "module": - *tokens = append(*tokens, Token{Kind: TokModule, Start: offset, End: offset + n, Line: line + lineInHeex}) + *tokens = append(*tokens, Token{Kind: TokModule, Start: offset, End: offset + n, Line: line_}) case "function": - *tokens = append(*tokens, Token{Kind: TokIdent, Start: offset, End: offset + n, Line: line + lineInHeex}) + *tokens = append(*tokens, Token{Kind: TokIdent, Start: offset, End: offset + n, Line: line_}) case ".": - *tokens = append(*tokens, Token{Kind: TokDot, Start: offset, End: offset + 1, Line: line + lineInHeex}) + *tokens = append(*tokens, Token{Kind: TokDot, Start: offset, End: offset + 1, Line: line_}) default: // The remainder of the sigil's contents are ignored. - *tokens = append(*tokens, Token{Kind: TokOther, Start: offset, End: offset + n, Line: line + lineInHeex}) + *tokens = append(*tokens, Token{Kind: TokOther, Start: offset, End: offset + n, Line: line_}) } }) diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 3223dc3..e9080b6 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -29,6 +29,9 @@ func parseElixir(src []byte) (root *tree_sitter.Node, cleanup func()) { // and make them available in `heex`, keyed by the parent `quoted_contents` node in `root` func parseElixirExtended(src []byte) (root *tree_sitter.Node, heex map[*tree_sitter.Node]*tree_sitter.Node, cleanup func()) { root, cleanupElixir := parseElixir(src) + if root == nil { + return nil, nil, nil + } var cleanupHeex [](func()) heex = make(map[*tree_sitter.Node]*tree_sitter.Node) @@ -37,6 +40,10 @@ func parseElixirExtended(src []byte) (root *tree_sitter.Node, heex map[*tree_sit node.Parent().Kind() == "sigil" && /* sigil_name */ node.PrevNamedSibling().Utf8Text(src) == "H" { heexRoot, cleanup_ := parseHeex(src[node.StartByte():node.EndByte()]) + if heexRoot == nil { + return + } + heex[node] = heexRoot cleanupHeex = append(cleanupHeex, cleanup_) } From 19f913ae23c40f0beb852f97a0e8b402af2c15f9 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Mon, 8 Jun 2026 12:20:53 -0700 Subject: [PATCH 11/31] first-class Tree type container for Elixir tree and HEEX sub-trees --- internal/lsp/documents.go | 26 +++-- internal/lsp/documents_test.go | 10 +- internal/lsp/server.go | 23 ++-- internal/parser/tokenizer.go | 9 +- internal/treesitter/tree.go | 157 +++++++++++++++++++++++++ internal/treesitter/variables.go | 162 ++++---------------------- internal/treesitter/variables_test.go | 146 ++--------------------- 7 files changed, 229 insertions(+), 304 deletions(-) create mode 100644 internal/treesitter/tree.go diff --git a/internal/lsp/documents.go b/internal/lsp/documents.go index a1874b8..01b83b4 100644 --- a/internal/lsp/documents.go +++ b/internal/lsp/documents.go @@ -7,10 +7,10 @@ import ( "sync" tree_sitter "github.com/tree-sitter/go-tree-sitter" - tree_sitter_elixir "github.com/tree-sitter/tree-sitter-elixir/bindings/go" "go.lsp.dev/protocol" "github.com/remoteoss/dexter/internal/parser" + "github.com/remoteoss/dexter/internal/treesitter" ) // defaultMaxTransient caps how many disk-loaded buffers may live in the @@ -45,7 +45,7 @@ type cachedDoc struct { // tree for free and only triggers ts_tree_delete if no handler still // holds a reference. type refTree struct { - tree *tree_sitter.Tree + tree *treesitter.Tree refs int retired bool } @@ -76,9 +76,9 @@ func (rt *refTree) retireLocked() { // (e.g. Claude Code) can still query references/hover/definition without // causing unbounded memory growth. type DocumentStore struct { - mu sync.RWMutex - docs map[string]*cachedDoc - parser *tree_sitter.Parser + mu sync.RWMutex + docs map[string]*cachedDoc + parsers map[treesitter.Language]*tree_sitter.Parser // LRU bookkeeping for transient (disk-loaded) entries only. The list // holds URIs in access-order, newest at the front. transientIdx maps @@ -89,11 +89,13 @@ type DocumentStore struct { } func NewDocumentStore() *DocumentStore { - p := tree_sitter.NewParser() - _ = p.SetLanguage(tree_sitter.NewLanguage(tree_sitter_elixir.Language())) + parsers := treesitter.AllParsers() + if parsers == nil { + return nil + } return &DocumentStore{ docs: make(map[string]*cachedDoc), - parser: p, + parsers: parsers, transientList: list.New(), transientIdx: make(map[string]*list.Element), maxTransient: defaultMaxTransient, @@ -149,7 +151,9 @@ func (ds *DocumentStore) CloseAll() { ds.docs = nil ds.transientList = nil ds.transientIdx = nil - ds.parser.Close() + for _, p := range ds.parsers { + p.Close() + } } func (ds *DocumentStore) Get(uri string) (string, bool) { @@ -300,7 +304,7 @@ func (ds *DocumentStore) evictTransientLocked() { // Callers must not close the returned tree directly. // // When ok is false, release is nil and must not be called. -func (ds *DocumentStore) GetTree(uri string) (*tree_sitter.Tree, []byte, func(), bool) { +func (ds *DocumentStore) GetTree(uri string) (*treesitter.Tree, []byte, func(), bool) { ds.mu.Lock() defer ds.mu.Unlock() doc, ok := ds.docs[uri] @@ -309,7 +313,7 @@ func (ds *DocumentStore) GetTree(uri string) (*tree_sitter.Tree, []byte, func(), } if doc.tree == nil { doc.src = []byte(doc.text) - doc.tree = &refTree{tree: ds.parser.Parse(doc.src, nil)} + doc.tree = &refTree{tree: treesitter.NewTree(doc.src)} } rt := doc.tree rt.refs++ diff --git a/internal/lsp/documents_test.go b/internal/lsp/documents_test.go index e9ee910..e94af2a 100644 --- a/internal/lsp/documents_test.go +++ b/internal/lsp/documents_test.go @@ -268,8 +268,8 @@ func TestDocumentStore_GetTree_DiskLoaded(t *testing.T) { if string(src) != contents { t.Fatalf("GetTree src mismatch: got %q want %q", src, contents) } - if tree.RootNode().Kind() != "source" { - t.Fatalf("expected root node kind 'source', got %q", tree.RootNode().Kind()) + if tree.Trunk.RootNode().Kind() != "source" { + t.Fatalf("expected root node kind 'source', got %q", tree.Trunk.RootNode().Kind()) } } @@ -470,7 +470,7 @@ func TestDocumentStore_GetTree_SurvivesEviction(t *testing.T) { // Capture the root node kind so we can re-read it after eviction. // Pre-fix, the eviction below would call ts_tree_delete on this tree // and the second RootNode() call would read freed C memory. - rootKindBefore := tree.RootNode().Kind() + rootKindBefore := tree.Trunk.RootNode().Kind() // Force eviction of this URI while we still hold a ref. ds.SetMaxTransient(0) @@ -480,7 +480,7 @@ func TestDocumentStore_GetTree_SurvivesEviction(t *testing.T) { // Walking the tree after eviction must still work - this is the UAF // the refcounting prevents. - rootKindAfter := tree.RootNode().Kind() + rootKindAfter := tree.Trunk.RootNode().Kind() if rootKindAfter != rootKindBefore { t.Fatalf("tree root kind changed across eviction: got %q want %q", rootKindAfter, rootKindBefore) } @@ -537,7 +537,7 @@ func TestDocumentStore_GetTree_ConcurrentEvictionStress(t *testing.T) { if !ok { continue } - root := tree.RootNode() + root := tree.Trunk.RootNode() _ = root.Kind() _ = root.ChildCount() release() diff --git a/internal/lsp/server.go b/internal/lsp/server.go index 73ed5d3..ea47a25 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -27,7 +27,6 @@ import ( "github.com/remoteoss/dexter/internal/parser" "github.com/remoteoss/dexter/internal/stdlib" "github.com/remoteoss/dexter/internal/store" - "github.com/remoteoss/dexter/internal/treesitter" "github.com/remoteoss/dexter/internal/version" ) @@ -662,10 +661,7 @@ func (s *Server) Definition(ctx context.Context, params *protocol.DefinitionPara if tree, src, release, ok := s.docs.GetTree(docURI); ok { defer release() - // FIXME: remove - treesitter.FindVariableOccurrences(src, uint(lineNum), uint(col)) - - if occs := treesitter.FindVariableOccurrencesWithTree(tree.RootNode(), src, uint(lineNum), uint(col)); len(occs) > 0 { + if occs := tree.FindVariableOccurrences(src, uint(lineNum), uint(col)); len(occs) > 0 { s.debugf("Definition: returning variable definition at line %d", occs[0].Line) return []protocol.Location{{ URI: params.TextDocument.URI, @@ -1735,7 +1731,7 @@ func (s *Server) Completion(ctx context.Context, params *protocol.CompletionPara var varsInScope []string if tree, src, release, ok := s.docs.GetTree(docURI); ok { defer release() - varsInScope = treesitter.FindVariablesInScopeWithTree(tree.RootNode(), src, uint(lineNum), uint(col)) + varsInScope = tree.FindVariablesInScope(src, uint(lineNum), uint(col)) } for _, varName := range varsInScope { if strings.HasPrefix(varName, funcPrefix) && !seen[varName] { @@ -2691,10 +2687,9 @@ func (s *Server) DocumentHighlight(ctx context.Context, params *protocol.Documen return nil, nil } defer release() - root := tree.RootNode() // Try scope-aware variable highlight first - if occs := treesitter.FindVariableOccurrencesWithTree(root, src, uint(lineNum), uint(col)); len(occs) > 0 { + if occs := tree.FindVariableOccurrences(src, uint(lineNum), uint(col)); len(occs) > 0 { var highlights []protocol.DocumentHighlight for _, occ := range occs { highlights = append(highlights, protocol.DocumentHighlight{ @@ -2726,7 +2721,7 @@ func (s *Server) DocumentHighlight(ctx context.Context, params *protocol.Documen } // Reuse the same parsed tree for token occurrences - occs := treesitter.FindTokenOccurrencesWithTree(root, src, token) + occs := tree.FindTokenOccurrences(src, token) if len(occs) == 0 { return nil, nil } @@ -3610,7 +3605,7 @@ func (s *Server) PrepareRename(ctx context.Context, params *protocol.PrepareRena if moduleRef == "" { if tree, src, release, ok := s.docs.GetTree(docURI); ok { defer release() - if occs := treesitter.FindVariableOccurrencesWithTree(tree.RootNode(), src, uint(lineNum), uint(col)); len(occs) > 0 { + if occs := tree.FindVariableOccurrences(src, uint(lineNum), uint(col)); len(occs) > 0 { for _, occ := range occs { if occ.Line == uint(lineNum) && uint(col) >= occ.StartCol && uint(col) < occ.EndCol { return &protocol.Range{ @@ -3808,7 +3803,7 @@ func (s *Server) References(ctx context.Context, params *protocol.ReferenceParam // function reference lookup. if tree, src, release, ok := s.docs.GetTree(docURI); ok { defer release() - if occs := treesitter.FindVariableOccurrencesWithTree(tree.RootNode(), src, uint(lineNum), uint(col)); len(occs) > 0 { + if occs := tree.FindVariableOccurrences(src, uint(lineNum), uint(col)); len(occs) > 0 { var locations []protocol.Location for _, occ := range occs { locations = append(locations, protocol.Location{ @@ -4014,8 +4009,8 @@ func (s *Server) Rename(ctx context.Context, params *protocol.RenameParams) (*pr if moduleRef == "" { if tree, src, release, ok := s.docs.GetTree(docURI); ok { defer release() - if occs := treesitter.FindVariableOccurrencesWithTree(tree.RootNode(), src, uint(lineNum), uint(col)); len(occs) > 0 { - if treesitter.NameExistsInScopeOf(tree.RootNode(), src, uint(lineNum), uint(col), params.NewName) { + if occs := tree.FindVariableOccurrences(src, uint(lineNum), uint(col)); len(occs) > 0 { + if tree.NameExistsInScopeOf(src, uint(lineNum), uint(col), params.NewName) { return nil, fmt.Errorf("variable %q already exists in this scope", params.NewName) } changes := make(map[protocol.DocumentURI][]protocol.TextEdit) @@ -5025,6 +5020,8 @@ func (s *Server) getFileLine(filePath string, lineNum int) (string, bool) { return scanner.Text(), true } } + // ignore any scan error + _ = scanner.Err() return "", false } diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index f84802c..79e5657 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -679,6 +679,9 @@ func scanSigil(source []byte, i, line int, lineStarts *[]int, tokens *[]Token) ( } sigilChars := string(source[start+1 : i]) + if i == len(source) { + return i, line + } escapes := isLower(sigilLetter) // only lowercase sigils process escapes openCh := source[i] @@ -783,6 +786,10 @@ func scanSigil(source []byte, i, line int, lineStarts *[]int, tokens *[]Token) ( } } + if contentsEnd <= contentsStart { + return i, line + } + // emit tokens if requested if tokens != nil { scanSigilContents(sigilChars, source, start, i, contentsStart, contentsEnd, startLine, lineStarts, tokens) @@ -799,7 +806,7 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta } lineOffset := func(src []byte, offset int) (lines int) { - for i := 0; i < offset; i++ { + for i := range offset { if src[i] == '\n' { lines++ } diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go new file mode 100644 index 0000000..d775dd2 --- /dev/null +++ b/internal/treesitter/tree.go @@ -0,0 +1,157 @@ +package treesitter + +import ( + "unsafe" + + tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" + tree_sitter "github.com/tree-sitter/go-tree-sitter" + tree_sitter_elixir "github.com/tree-sitter/tree-sitter-elixir/bindings/go" +) + +// Tree contains an Elixir document tree and a map of any HEEX sub-trees. +// Heex is a map of `quoted_content` nodes within sigils in the document +// tree to their corresponding HEEX sub-tree. +// +// (sigil (sigil_name) node: (quoted_content)) +type Tree struct { + Trunk *tree_sitter.Tree + Heex map[*tree_sitter.Node]*tree_sitter.Tree +} + +// Close closes the trunk tree and any HEEX sub-trees. +func (t *Tree) Close() { + for _, ht := range t.Heex { + ht.Close() + } + t.Trunk.Close() +} + +// NewTree creates parsers, parses src, parses nested HEEX templates, and returns the created trees. +// Used by the standalone (non-cached) entry points. Returns nil on failure. +func NewTree(src []byte) *Tree { + parsers := AllParsers() + if parsers == nil { + return nil + } + for _, p := range parsers { + defer p.Close() + } + return NewTreeWithParsers(src, parsers) +} + +// NewTreeWithParsers parses src, parses nested HEEX templates, and returns the created trees. +// Used by cached entry points . Returns nil on failure. +func NewTreeWithParsers(src []byte, parsers map[Language]*tree_sitter.Parser) *Tree { + trunk := parsers[LangElixir].Parse(src, nil) + if trunk == nil { + return nil + } + + heex := make(map[*tree_sitter.Node]*tree_sitter.Tree) + visitTree(trunk.RootNode(), func(node *tree_sitter.Node) { + if node.Kind() == "quoted_content" && + node.Parent().Kind() == "sigil" && + /* sigil_name */ node.PrevNamedSibling().Utf8Text(src) == "H" { + tree := parsers[LangHeex].Parse(src[node.StartByte():node.EndByte()], nil) + if tree == nil { + return + } + + heex[node] = tree + } + }) + + return &Tree{ + Trunk: trunk, + Heex: heex, + } +} + +type Language byte + +const ( + LangElixir Language = iota + LangHeex +) + +func NewParser(lang Language) *tree_sitter.Parser { + var language unsafe.Pointer + switch lang { + case LangElixir: + language = tree_sitter_elixir.Language() + case LangHeex: + language = tree_sitter_heex.Language() + } + + p := tree_sitter.NewParser() + if err := p.SetLanguage(tree_sitter.NewLanguage(language)); err != nil { + return nil + } + + return p +} + +func AllParsers() map[Language]*tree_sitter.Parser { + parsers := make(map[Language]*tree_sitter.Parser) + + for _, l := range []Language{LangElixir, LangHeex} { + p := NewParser(l) + if p == nil { + return nil + } + parsers[l] = p + } + + return parsers +} + +// ParseHeex parses the HEEX template in `src` and calls `onNode` for each leaf node +// it encounters. `onNode` is called with the leaf node's kind, text contents, and +// offset within the given `src` slice. +func ParseHeex(src []byte, onNode func(kind, text string, offset int)) { + p := NewParser(LangHeex) + if p == nil { + return + } + defer p.Close() + + tree := p.Parse(src, nil) + if tree == nil { + return + } + defer tree.Close() + + visitTree(tree.RootNode(), func(node *tree_sitter.Node) { + // notify visitor about leaf nodes + if node.ChildCount() == 0 { + onNode(node.Kind(), node.Utf8Text(src), int(node.StartByte())) + } + }) +} + +func visitTree(root *tree_sitter.Node, onNode func(node *tree_sitter.Node)) { + cursor := root.Walk() + defer cursor.Close() + + for { + // visit current node + onNode(cursor.Node()) + + // traverse down one level, if possible + if cursor.GotoFirstChild() { + continue + } + + for { + // traverse via siblings, if possible + if cursor.GotoNextSibling() { + break + } + + // move back up and recurse, returning once we're back to the root + if !cursor.GotoParent() { + return + } + } + } +} diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index e9080b6..f7298fb 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,121 +1,11 @@ package treesitter import ( - "log" "strings" - tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" tree_sitter "github.com/tree-sitter/go-tree-sitter" - tree_sitter_elixir "github.com/tree-sitter/tree-sitter-elixir/bindings/go" ) -// parseElixir creates a parser, parses src, and returns the root node plus a -// cleanup function that closes both tree and parser. Used by the standalone -// (non-cached) entry points. Returns (nil, nil) on failure. -func parseElixir(src []byte) (root *tree_sitter.Node, cleanup func()) { - p := tree_sitter.NewParser() - if err := p.SetLanguage(tree_sitter.NewLanguage(tree_sitter_elixir.Language())); err != nil { - p.Close() - return nil, nil - } - tree := p.Parse(src, nil) - return tree.RootNode(), func() { - tree.Close() - p.Close() - } -} - -// parseElixirExtended is similar to parseElixir but will also parse embedded HEEX templates -// and make them available in `heex`, keyed by the parent `quoted_contents` node in `root` -func parseElixirExtended(src []byte) (root *tree_sitter.Node, heex map[*tree_sitter.Node]*tree_sitter.Node, cleanup func()) { - root, cleanupElixir := parseElixir(src) - if root == nil { - return nil, nil, nil - } - - var cleanupHeex [](func()) - heex = make(map[*tree_sitter.Node]*tree_sitter.Node) - visitTree(root, func(node *tree_sitter.Node) { - if node.Kind() == "quoted_content" && - node.Parent().Kind() == "sigil" && - /* sigil_name */ node.PrevNamedSibling().Utf8Text(src) == "H" { - heexRoot, cleanup_ := parseHeex(src[node.StartByte():node.EndByte()]) - if heexRoot == nil { - return - } - - heex[node] = heexRoot - cleanupHeex = append(cleanupHeex, cleanup_) - } - }) - - return root, heex, func() { - for _, c := range cleanupHeex { - c() - } - cleanupElixir() - } -} - -// parseHeex parses HEEX present within ~H sigils and in `.heex` files -func parseHeex(src []byte) (root *tree_sitter.Node, cleanup func()) { - p := tree_sitter.NewParser() - if err := p.SetLanguage(tree_sitter.NewLanguage(tree_sitter_heex.Language())); err != nil { - p.Close() - return nil, nil - } - tree := p.Parse(src, nil) - return tree.RootNode(), func() { - tree.Close() - p.Close() - } -} - -// ParseHeex parses the HEEX template in `src` and calls `onNode` for each leaf node -// it encounters. `onNode` is called with the leaf node's kind, text contents, and -// offset within the given `src` slice. -func ParseHeex(src []byte, onNode func(kind, text string, offset int)) { - root, cleanup := parseHeex(src) - if root == nil { - return - } - defer cleanup() - - visitTree(root, func(node *tree_sitter.Node) { - // notify visitor about leaf nodes - if node.ChildCount() == 0 { - onNode(node.Kind(), node.Utf8Text(src), int(node.StartByte())) - } - }) -} - -func visitTree(root *tree_sitter.Node, onNode func(node *tree_sitter.Node)) { - cursor := root.Walk() - defer cursor.Close() - - for { - // visit current node - onNode(cursor.Node()) - - // traverse down one level, if possible - if cursor.GotoFirstChild() { - continue - } - - for { - // traverse via siblings, if possible - if cursor.GotoNextSibling() { - break - } - - // move back up and recurse, returning once we're back to the root - if !cursor.GotoParent() { - return - } - } - } -} - // VariableOccurrence is a position where a variable name appears. type VariableOccurrence struct { Line uint // 0-based @@ -127,22 +17,18 @@ type VariableOccurrence struct { // occurrences of the variable at the given cursor position within the // enclosing function scope. Returns nil if the cursor is not on a variable. func FindVariableOccurrences(src []byte, line, col uint) []VariableOccurrence { - root, heex, cleanup := parseElixirExtended(src) - // FIXME: remove - for p, h := range heex { - log.Printf("%s\n%s\n", p.Utf8Text(src), h.ToSexp()) - } - if root == nil { + tree := NewTree(src) + if tree == nil { return nil } - defer cleanup() - return FindVariableOccurrencesWithTree(root, src, line, col) + defer tree.Close() + return tree.FindVariableOccurrences(src, line, col) } // FindVariableOccurrencesWithTree is like FindVariableOccurrences but uses a // pre-parsed tree root, avoiding redundant parsing when a cached tree exists. -func FindVariableOccurrencesWithTree(root *tree_sitter.Node, src []byte, line, col uint) []VariableOccurrence { - resolved := resolveVariableScope(root, src, line, col) +func (t *Tree) FindVariableOccurrences(src []byte, line, col uint) []VariableOccurrence { + resolved := t.resolveVariableScope(src, line, col) if resolved == nil { return nil } @@ -190,8 +76,8 @@ func FindVariableOccurrencesWithTree(root *tree_sitter.Node, src []byte, line, c // // Bare identifiers that are zero-arity function calls (not bound as variables) // are NOT considered collisions — in Elixir, a variable simply shadows them. -func NameExistsInScopeOf(root *tree_sitter.Node, src []byte, line, col uint, newName string) bool { - resolved := resolveVariableScope(root, src, line, col) +func (t *Tree) NameExistsInScopeOf(src []byte, line, col uint, newName string) bool { + resolved := t.resolveVariableScope(src, line, col) if resolved == nil { return false } @@ -210,7 +96,7 @@ func NameExistsInScopeOf(root *tree_sitter.Node, src []byte, line, col uint, new // rather than a bare zero-arity function call. Reuses the full variable // resolution logic so the same scoping rules apply. pos := target.StartPosition() - return len(FindVariableOccurrencesWithTree(root, src, uint(pos.Row), uint(pos.Column))) > 0 + return len(t.FindVariableOccurrences(src, uint(pos.Row), uint(pos.Column))) > 0 } // findFirstNonCallIdentifier returns the first identifier node in the subtree @@ -252,8 +138,8 @@ type resolvedScope struct { // resolveVariableScope locates the cursor node at (line, col), validates it as // a variable or module attribute, and returns the enclosing scope. Returns nil // if the position is not on a renameable variable. -func resolveVariableScope(root *tree_sitter.Node, src []byte, line, col uint) *resolvedScope { - cursorNode := nodeAtPosition(root, line, col) +func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { + cursorNode := nodeAtPosition(t.Trunk.RootNode(), line, col) if cursorNode == nil || cursorNode.Kind() != "identifier" { return nil } @@ -770,19 +656,19 @@ func collectModuleAttributeOccurrences(node *tree_sitter.Node, src []byte, attrN // string search, this naturally skips strings, comments, atoms, and other // non-code contexts. func FindTokenOccurrences(src []byte, token string) []VariableOccurrence { - root, cleanup := parseElixir(src) - if root == nil { + tree := NewTree(src) + if tree == nil { return nil } - defer cleanup() - return FindTokenOccurrencesWithTree(root, src, token) + defer tree.Close() + return tree.FindTokenOccurrences(src, token) } // FindTokenOccurrencesWithTree is like FindTokenOccurrences but uses a // pre-parsed tree root. -func FindTokenOccurrencesWithTree(root *tree_sitter.Node, src []byte, token string) []VariableOccurrence { +func (t *Tree) FindTokenOccurrences(src []byte, token string) []VariableOccurrence { var occurrences []VariableOccurrence - collectTokenOccurrences(root, src, token, &occurrences) + collectTokenOccurrences(t.Trunk.RootNode(), src, token, &occurrences) return occurrences } @@ -843,20 +729,20 @@ func collectTokenOccurrences(node *tree_sitter.Node, src []byte, token string, o // function scope. Respects clause boundaries: variables from other case/fn // clauses are excluded. Returns nil if the cursor is not inside a function. func FindVariablesInScope(src []byte, line, col uint) []string { - root, cleanup := parseElixir(src) - if root == nil { + tree := NewTree(src) + if tree == nil { return nil } - defer cleanup() - return FindVariablesInScopeWithTree(root, src, line, col) + defer tree.Close() + return tree.FindVariablesInScope(src, line, col) } // FindVariablesInScopeWithTree is like FindVariablesInScope but uses a // pre-parsed tree root. -func FindVariablesInScopeWithTree(root *tree_sitter.Node, src []byte, line, col uint) []string { - cursorNode := nodeAtPosition(root, line, col) +func (t *Tree) FindVariablesInScope(src []byte, line, col uint) []string { + cursorNode := nodeAtPosition(t.Trunk.RootNode(), line, col) if cursorNode == nil && col > 0 { - cursorNode = nodeAtPosition(root, line, col-1) + cursorNode = nodeAtPosition(t.Trunk.RootNode(), line, col-1) } if cursorNode == nil { return nil diff --git a/internal/treesitter/variables_test.go b/internal/treesitter/variables_test.go index eca0f2c..a61173b 100644 --- a/internal/treesitter/variables_test.go +++ b/internal/treesitter/variables_test.go @@ -3,8 +3,6 @@ package treesitter import ( "strings" "testing" - - "github.com/google/go-cmp/cmp" ) func TestFindVariableOccurrences_BasicVariable(t *testing.T) { @@ -657,11 +655,11 @@ func TestFindVariableOccurrences_FullWorkerFile(t *testing.T) { defdelegate backoff(job), to: MyApp.Oban.EmailWorker end`) - root, cleanup := parseElixir(src) - if root == nil { + tree := NewTree(src) + if tree == nil { t.Fatal("failed to parse") } - defer cleanup() + defer tree.Close() // Find the actual line for "transfer_amount = Money.new" in this test source lines := strings.Split(string(src), "\n") @@ -677,7 +675,7 @@ end`) } t.Logf("transfer_amount rebind is at line %d: %q", transferLine, lines[transferLine]) - occs := FindVariableOccurrences(src, uint(transferLine), 6) + occs := tree.FindVariableOccurrences(src, uint(transferLine), 6) t.Logf("transfer_amount from line %d col 6: %d occs: %+v", transferLine, len(occs), occs) if occs == nil { t.Fatal("expected variable occurrences for 'transfer_amount', got nil") @@ -1461,12 +1459,15 @@ end apply(config) `) - root, cleanup := parseElixir(src) - defer cleanup() + tree := NewTree(src) + if tree == nil { + t.Fatal("failed to parse") + } + defer tree.Close() // Renaming top-level "config" to "other" is safe: "other" only exists as a // def-local, which is a different scope. - if NameExistsInScopeOf(root, src, 0, 0, "other") { + if tree.NameExistsInScopeOf(src, 0, 0, "other") { t.Error("false-positive collision: 'other' is a def-local, not in the top-level scope") } } @@ -1560,130 +1561,3 @@ config :app, value: some_helper() } } - -func TestParseHeexExpr_Empty(t *testing.T) { - src := []byte("") - - expr := ParseHeexExpr(src, 0) - if expr != nil { - t.Errorf("expected nil on empty input, got %#v", expr) - } -} - -func TestParseHeexExpr_HTMLTag(t *testing.T) { - src := []byte("
Hello, world!
") - - expr := ParseHeexExpr(src, 1) - if expr != nil { - t.Errorf("expected nil on raw HTML tag, got %#v", expr) - } -} - -func TestParseHeexExpr_Component(t *testing.T) { - tests := []struct { - src string - offset uint - want *HeexExpr - }{ - {"<.foo>Hello, world!", 2, &HeexExpr{ - Function: "foo", - Offset: 2, - }}, - // cursor on "." should nudge to the right - {"<.foo>Hello, world!", 1, &HeexExpr{ - Function: "foo", - Offset: 2, - }}, - {"Hello, world!", 1, &HeexExpr{ - Module: "Foo", - Function: "bar", - Offset: 1, - }}, - {"Hello, world!", 1, &HeexExpr{ - Module: "Foo.Bar", - Function: "baz", - Offset: 1, - }}, - // cursor on close tag should also point to component name - {"Hello, world!", 28, &HeexExpr{ - Module: "Foo.Bar", - Function: "baz", - Offset: 1, - }}, - // nested components should also work - {"
Hello, world!
", 6, &HeexExpr{ - Module: "Foo.Bar", - Function: "baz", - Offset: 6, - }}, - } - for _, tt := range tests { - got := ParseHeexExpr([]byte(tt.src), tt.offset) - if diff := cmp.Diff(tt.want, got); diff != "" { - t.Errorf("ParseHeexExpr(%#v, %d)\nparse mismatch (-want +got):\n%s", tt.src, tt.offset, diff) - } - } -} - -func TestParseHeexExpr_SelfClosingComponent(t *testing.T) { - tests := []struct { - src string - offset uint - want *HeexExpr - }{ - {"<.foo />", 2, &HeexExpr{ - Function: "foo", - Offset: 2, - }}, - // cursor on "." should nudge to the right - {"<.foo />", 1, &HeexExpr{ - Function: "foo", - Offset: 2, - }}, - {"", 1, &HeexExpr{ - Module: "Foo", - Function: "bar", - Offset: 1, - }}, - {"", 1, &HeexExpr{ - Module: "Foo.Bar", - Function: "baz", - Offset: 1, - }}, - {"
", 6, &HeexExpr{ - Module: "Foo.Bar", - Function: "baz", - Offset: 6, - }}, - } - for _, tt := range tests { - got := ParseHeexExpr([]byte(tt.src), tt.offset) - if diff := cmp.Diff(tt.want, got); diff != "" { - t.Errorf("ParseHeexExpr(%#v, %d)\nparse mismatch (-want +got):\n%s", tt.src, tt.offset, diff) - } - } -} - -func TestParseHeexExpr_Expression(t *testing.T) { - tests := []struct { - src string - offset uint - want *HeexExpr - }{ - {"
", 12, &HeexExpr{ - Expr: "class()", - Offset: 12, - }}, - // cursor within expression should still point to start offset of expression - {"
", 14, &HeexExpr{ - Expr: "class()", - Offset: 12, - }}, - } - for _, tt := range tests { - got := ParseHeexExpr([]byte(tt.src), tt.offset) - if diff := cmp.Diff(tt.want, got); diff != "" { - t.Errorf("ParseHeexExpr(%#v, %d)\nparse mismatch (-want +got):\n%s", tt.src, tt.offset, diff) - } - } -} From 328320bcb3b63d98cf93b9c906d614c9621b98f5 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Mon, 8 Jun 2026 15:25:33 -0700 Subject: [PATCH 12/31] review feedback - use NewTreeWithParsers in DocumentStore - prevent server startup if parsers unavailable - emit TokSigil for empty ~H sigils - simplify break to loop condition --- internal/lsp/documents.go | 2 +- internal/lsp/server.go | 9 ++++++++- internal/parser/tokenizer.go | 10 ++++++++-- internal/treesitter/tree.go | 8 ++------ 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/internal/lsp/documents.go b/internal/lsp/documents.go index 01b83b4..6e801c2 100644 --- a/internal/lsp/documents.go +++ b/internal/lsp/documents.go @@ -313,7 +313,7 @@ func (ds *DocumentStore) GetTree(uri string) (*treesitter.Tree, []byte, func(), } if doc.tree == nil { doc.src = []byte(doc.text) - doc.tree = &refTree{tree: treesitter.NewTree(doc.src)} + doc.tree = &refTree{tree: treesitter.NewTreeWithParsers(doc.src, ds.parsers)} } rt := doc.tree rt.refs++ diff --git a/internal/lsp/server.go b/internal/lsp/server.go index ea47a25..b17ae01 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -115,9 +115,13 @@ func (s *Server) debugNow() time.Time { } func NewServer(s *store.Store, projectRoot string) *Server { + docs := NewDocumentStore() + if docs == nil { + return nil + } return &Server{ store: s, - docs: NewDocumentStore(), + docs: docs, projectRoot: projectRoot, explicitRoot: projectRoot != "", followDelegates: true, @@ -138,6 +142,9 @@ func (s stdinoutCloser) Close() error { return nil } // Serve starts the LSP server on the given reader/writer (typically stdin/stdout). func Serve(in io.Reader, out io.Writer, s *store.Store, projectRoot string) error { server := NewServer(s, projectRoot) + if server == nil { + return nil + } logger, _ := zap.NewProduction() stream := jsonrpc2.NewStream(stdinoutCloser{in, out}) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 79e5657..2ce1ccf 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -786,13 +786,19 @@ func scanSigil(source []byte, i, line int, lineStarts *[]int, tokens *[]Token) ( } } - if contentsEnd <= contentsStart { + // incomplete sigil at end of document + if contentsEnd < contentsStart { return i, line } // emit tokens if requested if tokens != nil { - scanSigilContents(sigilChars, source, start, i, contentsStart, contentsEnd, startLine, lineStarts, tokens) + if contentsEnd == contentsStart { + // empty sigil + *tokens = append(*tokens, Token{Kind: TokSigil, Start: start, End: i, Line: line}) + } else { + scanSigilContents(sigilChars, source, start, i, contentsStart, contentsEnd, startLine, lineStarts, tokens) + } } return i, line diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index d775dd2..a7f6da5 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -142,12 +142,8 @@ func visitTree(root *tree_sitter.Node, onNode func(node *tree_sitter.Node)) { continue } - for { - // traverse via siblings, if possible - if cursor.GotoNextSibling() { - break - } - + // traverse via siblings, if possible + for !cursor.GotoNextSibling() { // move back up and recurse, returning once we're back to the root if !cursor.GotoParent() { return From c6a739a3718b2d080b4461f42797ec3d15e085ce Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Mon, 8 Jun 2026 20:24:41 -0700 Subject: [PATCH 13/31] wip: HEEX tokenizer --- internal/lsp/documents.go | 6 +- internal/lsp/server.go | 9 +- internal/parser/tokenizer.go | 287 ++++++++++++++++++++++++++++++ internal/parser/tokenizer_test.go | 39 ++-- 4 files changed, 316 insertions(+), 25 deletions(-) diff --git a/internal/lsp/documents.go b/internal/lsp/documents.go index 6e801c2..be699a4 100644 --- a/internal/lsp/documents.go +++ b/internal/lsp/documents.go @@ -89,13 +89,9 @@ type DocumentStore struct { } func NewDocumentStore() *DocumentStore { - parsers := treesitter.AllParsers() - if parsers == nil { - return nil - } return &DocumentStore{ docs: make(map[string]*cachedDoc), - parsers: parsers, + parsers: treesitter.AllParsers(), transientList: list.New(), transientIdx: make(map[string]*list.Element), maxTransient: defaultMaxTransient, diff --git a/internal/lsp/server.go b/internal/lsp/server.go index b17ae01..ea47a25 100644 --- a/internal/lsp/server.go +++ b/internal/lsp/server.go @@ -115,13 +115,9 @@ func (s *Server) debugNow() time.Time { } func NewServer(s *store.Store, projectRoot string) *Server { - docs := NewDocumentStore() - if docs == nil { - return nil - } return &Server{ store: s, - docs: docs, + docs: NewDocumentStore(), projectRoot: projectRoot, explicitRoot: projectRoot != "", followDelegates: true, @@ -142,9 +138,6 @@ func (s stdinoutCloser) Close() error { return nil } // Serve starts the LSP server on the given reader/writer (typically stdin/stdout). func Serve(in io.Reader, out io.Writer, s *store.Store, projectRoot string) error { server := NewServer(s, projectRoot) - if server == nil { - return nil - } logger, _ := zap.NewProduction() stream := jsonrpc2.NewStream(stdinoutCloser{in, out}) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 2ce1ccf..feabf1a 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -1,6 +1,7 @@ package parser import ( + "strings" "unicode" "unicode/utf8" @@ -865,6 +866,287 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta return start, line } +func TokenizeHeex(source []byte) TokenResult { + tokens := make([]Token, 0, len(source)/8) + lineStarts := make([]int, 1, 64) + lineStarts[0] = 0 // line 1 starts at byte 0 + line := 1 + i := 0 + + matchesSequence := func(src []byte, seq_ string) bool { + seq := []byte(seq_) + for i, c := range seq { + if i >= len(src) || src[i] != c { + return false + } + } + return true + } + + scanComment := func(delim string, i, line int, lineStarts *[]int) (int, int) { + for i < len(source) { + if matchesSequence(source[i:], delim) { + i += len(delim) + break + } + if source[i] == '\n' { + line++ + *lineStarts = append(*lineStarts, i+1) + } + i++ + } + return i, line + } + + scanInterpolation := func(i, line int, terminator string, lineStarts *[]int, tokens *[]Token) (int, int) { + // start := i + for i < len(source) { + // FIXME: this will get tripped up when the terminator appears within the interpolation + // in an ignored context, such as within a string or comment + if matchesSequence(source[i:], terminator) { + i += len(terminator) + break + } + + if source[i] == '\n' { + *tokens = append(*tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) + line++ + i++ + *lineStarts = append(*lineStarts, i) + } + } + + // FIXME: once we've identified the index of the terminator, we know the full range of + // the interpolation and may call TokenizeFull on source[start:i] to tokenize the expression + + return i, line + } + + scanTagAttr := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { + var quoteChar byte + for i < len(source) { + ch := source[i] + + switch { + case ch == '\n': + *tokens = append(*tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) + line++ + i++ + *lineStarts = append(*lineStarts, i) + + case quoteChar == 0 && isWhitespace(ch): + return i, line + + case ch == '\'' || ch == '"': + quoteChar = ch + i++ + + case quoteChar != 0 && ch == '\\': + i++ + if i < len(source) { + i++ + } + + default: + i++ + } + } + + return i, line + } + + scanTag := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { + hasName := false + var tagName strings.Builder + var tagNameTokens []Token + for i < len(source) { + switch { + case source[i] == '\n': + *tokens = append(*tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) + line++ + i++ + *lineStarts = append(*lineStarts, i) + + case isWhitespace(source[i]): + i++ + + // <.foo + case source[i] == '.': + dotTkn := Token{Kind: TokDot, Start: i, End: i + 1, Line: line} + *tokens = append(*tokens, dotTkn) + i++ + + start := i + for i < len(source) && (isLetter(source[i]) || isDigit(source[i])) { + i++ + } + if i == start { + break + } + + var tknKind TokenKind + if isUpper(source[start]) { + tknKind = TokModule + } else { + tknKind = TokIdent + hasName = true + } + nameTkn := Token{Kind: tknKind, Start: start, End: i, Line: line} + *tokens = append(*tokens, nameTkn) + tagName.WriteByte('.') + tagName.Write(source[start:i]) + tagNameTokens = append(tagNameTokens, dotTkn, nameTkn) + + //
= len(source) { + break + } + if matchesSequence(source[i+1:], "if={") { + i += 4 + i, line = scanInterpolation(i, line, "}", lineStarts, tokens) + } else if matchesSequence(source[i+1:], "for={") { + i += 5 + i, line = scanInterpolation(i, line, "}", lineStarts, tokens) + } else if matchesSequence(source[i+1:], "let={") { + i += 5 + i, line = scanInterpolation(i, line, "}", lineStarts, tokens) + } else { + i, line = scanTagAttr(i, line, lineStarts, tokens) + } + + // self-closing tag + case source[i] == '/': + if i+1 < len(source) && source[i+1] == '>' { + i += 2 + return i, line + } + + // finish open tag + case source[i] == '>': + for i < len(source) { + closeTag := "" + if matchesSequence(source[i:], closeTag) { + // consume ", i, line, &lineStarts) + tokens = append(tokens, Token{Kind: TokComment, Start: start, End: i, Line: startLine}) + } else if source[i] == '%' { + // consume % + i++ + if source[i] == '!' && i+2 < len(source) && source[i+1] == '-' && source[i+2] == '-' { + // HEEX comment "<%!--" + i += 3 + start := i - 5 + startLine := line + i, line = scanComment("--%>", i, line, &lineStarts) + tokens = append(tokens, Token{Kind: TokComment, Start: start, End: i, Line: startLine}) + } else { + // EEX interpolation "<%" + // EEX special form "<% for", "<% if", "<% case", "<% cond", "<% else", "<% end", "<% _ ->" + i, line = scanInterpolation(i, line, "%>", &lineStarts, &tokens) + } + } else { + // HTML tag "= '0' && ch <= '9' } +// isWhitespace returns true for space, tab, and carriage return. +func isWhitespace(ch byte) bool { + return ch == ' ' || ch == '\t' || ch == '\r' +} + // isHexDigit returns true for [0-9a-fA-F]. func isHexDigit(ch byte) bool { return isDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F') diff --git a/internal/parser/tokenizer_test.go b/internal/parser/tokenizer_test.go index 7b38365..a8c8b43 100644 --- a/internal/parser/tokenizer_test.go +++ b/internal/parser/tokenizer_test.go @@ -2077,9 +2077,8 @@ func TestTokenize_EscapedNewlineLineTracking(t *testing.T) { } func TestLineStartsAccuracy(t *testing.T) { - assertLineStarts := func(t *testing.T, src string) { + assertLineStarts := func(t *testing.T, src string, result TokenResult) { t.Helper() - result := TokenizeFull([]byte(src)) lineStarts := result.LineStarts lines := strings.Split(src, "\n") if len(lineStarts) != len(lines) { @@ -2100,9 +2099,8 @@ func TestLineStartsAccuracy(t *testing.T) { } } - assertTokenAt := func(t *testing.T, src string, line0, col int, wantKind TokenKind, wantText string) { + assertTokenAt := func(t *testing.T, src string, result TokenResult, line0, col int, wantKind TokenKind, wantText string) { t.Helper() - result := TokenizeFull([]byte(src)) offset := LineColToOffset(result.LineStarts, line0, col) idx := TokenAtOffset(result.Tokens, offset) if idx < 0 { @@ -2119,25 +2117,42 @@ func TestLineStartsAccuracy(t *testing.T) { t.Run("heredoc", func(t *testing.T) { src := "defmodule MyApp.Example do\n @moduledoc \"\"\"\n This is a long\n multiline heredoc\n with several lines\n of documentation.\n \"\"\"\n\n @type t :: %__MODULE__{\n name: String.t(),\n age: Integer.t()\n }\n\n def hello do\n :world\n end\nend" - assertLineStarts(t, src) - assertTokenAt(t, src, 9, 16, TokModule, "String") + result := TokenizeFull([]byte(src)) + assertLineStarts(t, src, result) + assertTokenAt(t, src, result, 9, 16, TokModule, "String") }) t.Run("multiline string", func(t *testing.T) { src := "x = \"line one\nline two\nline three\"\ny = Enum.map(list, fn x -> x end)" - assertLineStarts(t, src) - assertTokenAt(t, src, 3, 4, TokModule, "Enum") + result := TokenizeFull([]byte(src)) + assertLineStarts(t, src, result) + assertTokenAt(t, src, result, 3, 4, TokModule, "Enum") }) t.Run("sigil heredoc", func(t *testing.T) { src := "x = ~s\"\"\"\nline one\nline two\n\"\"\"\ny = MyModule.func()" - assertLineStarts(t, src) - assertTokenAt(t, src, 4, 4, TokModule, "MyModule") + result := TokenizeFull([]byte(src)) + assertLineStarts(t, src, result) + assertTokenAt(t, src, result, 4, 4, TokModule, "MyModule") }) t.Run("multiline interpolation", func(t *testing.T) { src := "x = \"hello #{\n some_func()\n}\"\ny = String.trim(x)" - assertLineStarts(t, src) - assertTokenAt(t, src, 3, 4, TokModule, "String") + result := TokenizeFull([]byte(src)) + assertLineStarts(t, src, result) + assertTokenAt(t, src, result, 3, 4, TokModule, "String") + }) + + t.Run("HEEX: comment", func(t *testing.T) { + src := "" + result := TokenizeHeex([]byte(src)) + assertLineStarts(t, src, result) + assertTokenAt(t, src, result, 0, 0, TokComment, "") }) } + +func TestTokenizeHeex(t *testing.T) { + src := "<%!-- hello, world! --%>foo\nbar" + result := TokenizeHeex([]byte(src)) + fmt.Printf("%+v %s\n", result, TokenText([]byte(src), result.Tokens[0])) +} From 9c82cc4f59b3a59481481a5758c585dd1a179f08 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Mon, 8 Jun 2026 20:34:30 -0700 Subject: [PATCH 14/31] scan interpolations in standard HTML tag attrs --- internal/parser/tokenizer.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index feabf1a..dcddfe2 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -937,7 +937,7 @@ func TokenizeHeex(source []byte) TokenResult { case quoteChar == 0 && isWhitespace(ch): return i, line - case ch == '\'' || ch == '"': + case quoteChar == 0 && ch == '\'' || ch == '"': quoteChar = ch i++ @@ -947,6 +947,9 @@ func TokenizeHeex(source []byte) TokenResult { i++ } + case ch == '{': + return scanInterpolation(i, line, "}", lineStarts, tokens) + default: i++ } From 0657d5b2cc6b38d46b4731dd56811fd6f749b0bd Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Mon, 8 Jun 2026 20:37:01 -0700 Subject: [PATCH 15/31] perform bounds check before access --- internal/parser/tokenizer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index dcddfe2..582713f 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -1114,7 +1114,7 @@ func TokenizeHeex(source []byte) TokenResult { } else if source[i] == '%' { // consume % i++ - if source[i] == '!' && i+2 < len(source) && source[i+1] == '-' && source[i+2] == '-' { + if i+2 < len(source) && source[i] == '!' && source[i+1] == '-' && source[i+2] == '-' { // HEEX comment "<%!--" i += 3 start := i - 5 From b8d299b6aa1d2eb9ae20524317edd0d2def03428 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Tue, 9 Jun 2026 14:43:30 -0700 Subject: [PATCH 16/31] wip: continue HEEX tokenizer impl --- internal/parser/tokenizer.go | 141 +++++++++++++++++----------- internal/parser/tokenizer_test.go | 58 +++++++++++- internal/parser/tokenkind_string.go | 79 ++++++++++++++++ internal/treesitter/tree.go | 28 +----- 4 files changed, 222 insertions(+), 84 deletions(-) create mode 100644 internal/parser/tokenkind_string.go diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 582713f..2631b2f 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -4,11 +4,11 @@ import ( "strings" "unicode" "unicode/utf8" - - "github.com/remoteoss/dexter/internal/treesitter" ) // TokenKind identifies the kind of a lexed token. +// +//go:generate stringer -type=TokenKind type TokenKind byte const ( @@ -812,56 +812,15 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta return start, line } - lineOffset := func(src []byte, offset int) (lines int) { - for i := range offset { - if src[i] == '\n' { - lines++ - } + result := TokenizeHeex(source[contentsStart:contentsEnd]) + for _, t := range result.Tokens { + if t.Kind != TokEOF { + *tokens = append(*tokens, Token{Kind: t.Kind, Start: t.Start + contentsStart, End: t.End + contentsStart, Line: t.Line + line - 1}) } - return } - - xml := source[contentsStart:contentsEnd] - treesitter.ParseHeex(xml, func(kind, text string, offset int) { - line_ := lineOffset(source, contentsStart+offset) + 1 - offset += contentsStart - n := len(text) - - switch kind { - case "expression_value": - res := TokenizeFull([]byte(text)) - - for _, t := range res.Tokens { - if t.Kind == TokEOF { - continue - } - *tokens = append(*tokens, Token{ - Kind: t.Kind, - Start: t.Start + offset, - End: t.End + offset, - Line: t.Line + line_ - 1, - }) - } - - // FIXME: how do we need to update lineStarts? - // for _, l := range res.LineStarts[1:] { - // *lineStarts = append(*lineStarts, line_) - // } - - case "module": - *tokens = append(*tokens, Token{Kind: TokModule, Start: offset, End: offset + n, Line: line_}) - - case "function": - *tokens = append(*tokens, Token{Kind: TokIdent, Start: offset, End: offset + n, Line: line_}) - - case ".": - *tokens = append(*tokens, Token{Kind: TokDot, Start: offset, End: offset + 1, Line: line_}) - - default: - // The remainder of the sigil's contents are ignored. - *tokens = append(*tokens, Token{Kind: TokOther, Start: offset, End: offset + n, Line: line_}) - } - }) + for _, ls := range result.LineStarts[1:] { + *lineStarts = append(*lineStarts, ls+start) + } return start, line } @@ -898,13 +857,11 @@ func TokenizeHeex(source []byte) TokenResult { return i, line } - scanInterpolation := func(i, line int, terminator string, lineStarts *[]int, tokens *[]Token) (int, int) { - // start := i + scanUntil := func(i, line int, terminator string, lineStarts *[]int, tokens *[]Token) (int, int) { for i < len(source) { // FIXME: this will get tripped up when the terminator appears within the interpolation // in an ignored context, such as within a string or comment if matchesSequence(source[i:], terminator) { - i += len(terminator) break } @@ -913,11 +870,70 @@ func TokenizeHeex(source []byte) TokenResult { line++ i++ *lineStarts = append(*lineStarts, i) + } else { + i++ } } - // FIXME: once we've identified the index of the terminator, we know the full range of - // the interpolation and may call TokenizeFull on source[start:i] to tokenize the expression + return i, line + } + + scanInterpolation := func(i, line int, terminator string, lineStarts *[]int, tokens *[]Token) (int, int) { + start := i + startLine := line + + // FIXME: this will get tripped up when the terminator appears within the interpolation + // in an ignored context, such as within a string or comment + i, line = scanUntil(i, line, terminator, lineStarts, tokens) + + result := TokenizeFull(source[start:i]) + for _, t := range result.Tokens { + if t.Kind != TokEOF { + *tokens = append(*tokens, Token{Kind: t.Kind, Start: t.Start + start, End: t.End + start, Line: t.Line + startLine - 1}) + } + } + for _, ls := range result.LineStarts[1:] { + *lineStarts = append(*lineStarts, ls+start) + } + + i += len(terminator) + + return i, line + } + + scanSpecialForm := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { + for i < len(source) { + switch { + case source[i] == '\n': + *tokens = append(*tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) + line++ + i++ + *lineStarts = append(*lineStarts, i) + + case isWhitespace(source[i]): + i++ + + // FIXME: these forms may introduce variables bindings, so we can't just ignore their contents + // "<% for", "<% if", "<% case", "<% cond", "<% else", "<% end" + case matchesSequence(source[i:], "for") || + matchesSequence(source[i:], "if") || + matchesSequence(source[i:], "else") || + matchesSequence(source[i:], "case") || + matchesSequence(source[i:], "cond") || + matchesSequence(source[i:], "end"): + i, line = scanUntil(i, line, "%>", lineStarts, tokens) + // consume %> + i += 2 + return i, line + + // FIXME: `<% {pattern} -> %>` from case/cond special forms affect scope resolution + // and probably don't tokenize correctly with `scanInterpolation` since they aren't + // complete expressions on their own + + default: + return scanInterpolation(i, line, "%>", lineStarts, tokens) + } + } return i, line } @@ -937,7 +953,7 @@ func TokenizeHeex(source []byte) TokenResult { case quoteChar == 0 && isWhitespace(ch): return i, line - case quoteChar == 0 && ch == '\'' || ch == '"': + case quoteChar == 0 && (ch == '\'' || ch == '"'): quoteChar = ch i++ @@ -948,6 +964,7 @@ func TokenizeHeex(source []byte) TokenResult { } case ch == '{': + i++ return scanInterpolation(i, line, "}", lineStarts, tokens) default: @@ -1051,6 +1068,12 @@ func TokenizeHeex(source []byte) TokenResult { // finish open tag case source[i] == '>': + // FIXME: we need to parse nested tags recursively, otherwise this would find the inner `
` + // instead of the outer in this situation: + // + //
+ //
<-- would find this one + //
<-- want to find this one instead for i < len(source) { closeTag := "" if matchesSequence(source[i:], closeTag) { @@ -1122,9 +1145,13 @@ func TokenizeHeex(source []byte) TokenResult { i, line = scanComment("--%>", i, line, &lineStarts) tokens = append(tokens, Token{Kind: TokComment, Start: start, End: i, Line: startLine}) } else { + // consume "=" output indicator from "<%=" special form prefix + if source[i] == '=' { + i++ + } // EEX interpolation "<%" // EEX special form "<% for", "<% if", "<% case", "<% cond", "<% else", "<% end", "<% _ ->" - i, line = scanInterpolation(i, line, "%>", &lineStarts, &tokens) + i, line = scanSpecialForm(i, line, &lineStarts, &tokens) } } else { // HTML tag "", + `TokComment (0:24) "<%!-- hello, world! --%>" +TokEOF (24:24) +`}, + {"
hello!
", `TokEOF (17:17) +`}, + {"<.foo>", `TokDot (1:2) +TokIdent (2:5) "foo" +TokDot (8:9) +TokIdent (9:12) "foo" +TokEOF (13:13) +`}, + {"<.foo />", `TokDot (1:2) +TokIdent (2:5) "foo" +TokEOF (8:8) +`}, + {"<.live_component id=\"foo\" module={Foo.Bar} />", `TokDot (1:2) +TokIdent (2:6) "live" +TokModule (34:37) "Foo" +TokDot (37:38) +TokModule (38:41) "Bar" +TokEOF (45:45) +`}, + {`
+ <%!-- Header --%>`, ""}, + } + + for _, tt := range tests { + result := TokenizeHeex([]byte(tt.src)) + got := DebugTokens([]byte(tt.src), result.Tokens) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("TokenizeHeex(src) (-want +got)\n\n%s\n\n%s", tt.src, diff) + } + } +} + +func DebugTokens(source []byte, tokens []Token) string { + var s strings.Builder + + for _, t := range tokens { + switch t.Kind { + case TokDot, TokEOL, TokEOF, TokOpenBrace, TokCloseBrace: + fmt.Fprintf(&s, "%s (%d:%d)\n", t.Kind.String(), t.Start, t.End) + + default: + fmt.Fprintf(&s, "%s (%d:%d) %#v\n", t.Kind.String(), t.Start, t.End, TokenText(source, t)) + } + } + + return s.String() } diff --git a/internal/parser/tokenkind_string.go b/internal/parser/tokenkind_string.go new file mode 100644 index 0000000..8de4041 --- /dev/null +++ b/internal/parser/tokenkind_string.go @@ -0,0 +1,79 @@ +// Code generated by "stringer -type=TokenKind"; DO NOT EDIT. + +package parser + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TokDefmodule-0] + _ = x[TokDef-1] + _ = x[TokDefp-2] + _ = x[TokDefmacro-3] + _ = x[TokDefmacrop-4] + _ = x[TokDefguard-5] + _ = x[TokDefguardp-6] + _ = x[TokDefdelegate-7] + _ = x[TokDefprotocol-8] + _ = x[TokDefimpl-9] + _ = x[TokDefstruct-10] + _ = x[TokDefexception-11] + _ = x[TokAlias-12] + _ = x[TokImport-13] + _ = x[TokUse-14] + _ = x[TokRequire-15] + _ = x[TokDo-16] + _ = x[TokEnd-17] + _ = x[TokFn-18] + _ = x[TokWhen-19] + _ = x[TokIdent-20] + _ = x[TokModule-21] + _ = x[TokAttr-22] + _ = x[TokAttrDoc-23] + _ = x[TokAttrSpec-24] + _ = x[TokAttrType-25] + _ = x[TokAttrBehaviour-26] + _ = x[TokAttrCallback-27] + _ = x[TokString-28] + _ = x[TokHeredoc-29] + _ = x[TokSigil-30] + _ = x[TokCharLiteral-31] + _ = x[TokAtom-32] + _ = x[TokDot-33] + _ = x[TokComma-34] + _ = x[TokColon-35] + _ = x[TokOpenParen-36] + _ = x[TokCloseParen-37] + _ = x[TokOpenBracket-38] + _ = x[TokCloseBracket-39] + _ = x[TokOpenBrace-40] + _ = x[TokCloseBrace-41] + _ = x[TokOpenAngle-42] + _ = x[TokCloseAngle-43] + _ = x[TokPipe-44] + _ = x[TokBackslash-45] + _ = x[TokRightArrow-46] + _ = x[TokLeftArrow-47] + _ = x[TokAssoc-48] + _ = x[TokDoubleColon-49] + _ = x[TokPercent-50] + _ = x[TokNumber-51] + _ = x[TokComment-52] + _ = x[TokEOL-53] + _ = x[TokEOF-54] + _ = x[TokOther-55] +} + +const _TokenKind_name = "TokDefmoduleTokDefTokDefpTokDefmacroTokDefmacropTokDefguardTokDefguardpTokDefdelegateTokDefprotocolTokDefimplTokDefstructTokDefexceptionTokAliasTokImportTokUseTokRequireTokDoTokEndTokFnTokWhenTokIdentTokModuleTokAttrTokAttrDocTokAttrSpecTokAttrTypeTokAttrBehaviourTokAttrCallbackTokStringTokHeredocTokSigilTokCharLiteralTokAtomTokDotTokCommaTokColonTokOpenParenTokCloseParenTokOpenBracketTokCloseBracketTokOpenBraceTokCloseBraceTokOpenAngleTokCloseAngleTokPipeTokBackslashTokRightArrowTokLeftArrowTokAssocTokDoubleColonTokPercentTokNumberTokCommentTokEOLTokEOFTokOther" + +var _TokenKind_index = [...]uint16{0, 12, 18, 25, 36, 48, 59, 71, 85, 99, 109, 121, 136, 144, 153, 159, 169, 174, 180, 185, 192, 200, 209, 216, 226, 237, 248, 264, 279, 288, 298, 306, 320, 327, 333, 341, 349, 361, 374, 388, 403, 415, 428, 440, 453, 460, 472, 485, 497, 505, 519, 529, 538, 548, 554, 560, 568} + +func (i TokenKind) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_TokenKind_index)-1 { + return "TokenKind(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenKind_name[_TokenKind_index[idx]:_TokenKind_index[idx+1]] +} diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index a7f6da5..12f07cb 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -97,6 +97,10 @@ func AllParsers() map[Language]*tree_sitter.Parser { for _, l := range []Language{LangElixir, LangHeex} { p := NewParser(l) if p == nil { + // if a parser fails to initialize, close any already-opened parsers + for _, pp := range parsers { + pp.Close() + } return nil } parsers[l] = p @@ -105,30 +109,6 @@ func AllParsers() map[Language]*tree_sitter.Parser { return parsers } -// ParseHeex parses the HEEX template in `src` and calls `onNode` for each leaf node -// it encounters. `onNode` is called with the leaf node's kind, text contents, and -// offset within the given `src` slice. -func ParseHeex(src []byte, onNode func(kind, text string, offset int)) { - p := NewParser(LangHeex) - if p == nil { - return - } - defer p.Close() - - tree := p.Parse(src, nil) - if tree == nil { - return - } - defer tree.Close() - - visitTree(tree.RootNode(), func(node *tree_sitter.Node) { - // notify visitor about leaf nodes - if node.ChildCount() == 0 { - onNode(node.Kind(), node.Utf8Text(src), int(node.StartByte())) - } - }) -} - func visitTree(root *tree_sitter.Node, onNode func(node *tree_sitter.Node)) { cursor := root.Walk() defer cursor.Close() From 80d3f1b3ad2964aa588cb3428a7ec29a682e8ddb Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 11:32:19 -0700 Subject: [PATCH 17/31] wip: tokenize HEEX - fix infinite loops --- internal/parser/tokenizer.go | 180 ++++++++++++++---------------- internal/parser/tokenizer_test.go | 47 ++++---- 2 files changed, 112 insertions(+), 115 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 2631b2f..6e753d2 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -1,6 +1,7 @@ package parser import ( + "fmt" "strings" "unicode" "unicode/utf8" @@ -938,6 +939,55 @@ func TokenizeHeex(source []byte) TokenResult { return i, line } + scanTagName := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { + for i < len(source) { + switch { + // <.foo + case source[i] == '.': + *tokens = append(*tokens, Token{Kind: TokDot, Start: i, End: i + 1, Line: line}) + i++ + + start := i + for i < len(source) && (isLetter(source[i]) || isDigit(source[i]) || source[i] == '_' || source[i] == '-') { + i++ + } + if i == start { + return i, line + } + + if isUpper(source[start]) { + *tokens = append(*tokens, Token{Kind: TokModule, Start: start, End: i, Line: line}) + } else { + *tokens = append(*tokens, Token{Kind: TokIdent, Start: start, End: i, Line: line}) + return i, line + } + + //
': + return i, line + default: i++ } @@ -977,8 +1030,6 @@ func TokenizeHeex(source []byte) TokenResult { scanTag := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { hasName := false - var tagName strings.Builder - var tagNameTokens []Token for i < len(source) { switch { case source[i] == '\n': @@ -990,74 +1041,19 @@ func TokenizeHeex(source []byte) TokenResult { case isWhitespace(source[i]): i++ - // <.foo - case source[i] == '.': - dotTkn := Token{Kind: TokDot, Start: i, End: i + 1, Line: line} - *tokens = append(*tokens, dotTkn) - i++ - - start := i - for i < len(source) && (isLetter(source[i]) || isDigit(source[i])) { - i++ - } - if i == start { - break - } - - var tknKind TokenKind - if isUpper(source[start]) { - tknKind = TokModule - } else { - tknKind = TokIdent - hasName = true - } - nameTkn := Token{Kind: tknKind, Start: start, End: i, Line: line} - *tokens = append(*tokens, nameTkn) - tagName.WriteByte('.') - tagName.Write(source[start:i]) - tagNameTokens = append(tagNameTokens, dotTkn, nameTkn) - - //
= len(source) { - break - } - if matchesSequence(source[i+1:], "if={") { - i += 4 - i, line = scanInterpolation(i, line, "}", lineStarts, tokens) - } else if matchesSequence(source[i+1:], "for={") { - i += 5 - i, line = scanInterpolation(i, line, "}", lineStarts, tokens) - } else if matchesSequence(source[i+1:], "let={") { - i += 5 - i, line = scanInterpolation(i, line, "}", lineStarts, tokens) - } else { - i, line = scanTagAttr(i, line, lineStarts, tokens) - } + i++ + i, line = scanTagAttr(i, line, lineStarts, tokens) // self-closing tag case source[i] == '/': @@ -1068,37 +1064,8 @@ func TokenizeHeex(source []byte) TokenResult { // finish open tag case source[i] == '>': - // FIXME: we need to parse nested tags recursively, otherwise this would find the inner `
` - // instead of the outer in this situation: - // - //
- //
<-- would find this one - //
<-- want to find this one instead - for i < len(source) { - closeTag := "" - if matchesSequence(source[i:], closeTag) { - // consume " i, line = scanSpecialForm(i, line, &lineStarts, &tokens) } + } else if source[i] == '/' { + i++ + i, line = scanTagName(i, line, &lineStarts, &tokens) + if i < len(source) && source[i] == '>' { + i++ + } } else { // HTML tag "= len(source) || source[i+1] != ':') } + +// DebugTokens returns a string represention similar to %+v for a slice of tokens. +func DebugTokens(source []byte, tokens []Token) string { + var s strings.Builder + + for _, t := range tokens { + switch t.Kind { + case TokDot, TokEOL, TokEOF, TokOpenBrace, TokCloseBrace: + fmt.Fprintf(&s, "%s (%d:%d)\n", t.Kind.String(), t.Start, t.End) + + default: + fmt.Fprintf(&s, "%s (%d:%d) %#v\n", t.Kind.String(), t.Start, t.End, TokenText(source, t)) + } + } + + return s.String() +} diff --git a/internal/parser/tokenizer_test.go b/internal/parser/tokenizer_test.go index f851389..28b64d6 100644 --- a/internal/parser/tokenizer_test.go +++ b/internal/parser/tokenizer_test.go @@ -1,9 +1,11 @@ package parser import ( + "context" "fmt" "strings" "testing" + "time" "github.com/google/go-cmp/cmp" ) @@ -2173,38 +2175,43 @@ TokEOF (13:13) TokIdent (2:5) "foo" TokEOF (8:8) `}, - {"<.live_component id=\"foo\" module={Foo.Bar} />", `TokDot (1:2) -TokIdent (2:6) "live" + {"<.live_component id=\"foo\" module={Foo.Bar} no-value />", `TokDot (1:2) +TokIdent (2:16) "live_component" TokModule (34:37) "Foo" TokDot (37:38) TokModule (38:41) "Bar" -TokEOF (45:45) +TokEOF (54:54) `}, - {`
- <%!-- Header --%>`, ""}, } for _, tt := range tests { - result := TokenizeHeex([]byte(tt.src)) - got := DebugTokens([]byte(tt.src), result.Tokens) - if diff := cmp.Diff(tt.want, got); diff != "" { - t.Errorf("TokenizeHeex(src) (-want +got)\n\n%s\n\n%s", tt.src, diff) + err := withTimeout(200_000_000_000, func() { + result := TokenizeHeex([]byte(tt.src)) + got := DebugTokens([]byte(tt.src), result.Tokens) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("TokenizeHeex(src) (-want +got)\n\n%.512s\n\n%s", tt.src, diff) + } + }) + if err == context.DeadlineExceeded { + t.Errorf("TokenizeHeex(src) timeout after 2s\n\n%.512s", tt.src) } } } -func DebugTokens(source []byte, tokens []Token) string { - var s strings.Builder +func withTimeout(ms time.Duration, cb func()) error { + ctx, cancel := context.WithTimeout(context.Background(), ms*time.Millisecond) + defer cancel() - for _, t := range tokens { - switch t.Kind { - case TokDot, TokEOL, TokEOF, TokOpenBrace, TokCloseBrace: - fmt.Fprintf(&s, "%s (%d:%d)\n", t.Kind.String(), t.Start, t.End) + done := make(chan struct{}) + go func() { + cb() + done <- struct{}{} + }() - default: - fmt.Fprintf(&s, "%s (%d:%d) %#v\n", t.Kind.String(), t.Start, t.End, TokenText(source, t)) - } + select { + case <-done: + return nil + case <-ctx.Done(): + return ctx.Err() } - - return s.String() } From 7e1bfc57ac5e71a30b557c39d4da38e2bfbd0939 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 12:12:04 -0700 Subject: [PATCH 18/31] don't duplicate lineStarts when parsing sigil contents --- internal/parser/tokenizer.go | 4 +--- internal/parser/tokenizer_test.go | 7 +++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 6e753d2..7cbd7fc 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -813,15 +813,13 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta return start, line } + // lineStarts has already been updated by `scanHeredocContent` / `scanRawHeredocContent` result := TokenizeHeex(source[contentsStart:contentsEnd]) for _, t := range result.Tokens { if t.Kind != TokEOF { *tokens = append(*tokens, Token{Kind: t.Kind, Start: t.Start + contentsStart, End: t.End + contentsStart, Line: t.Line + line - 1}) } } - for _, ls := range result.LineStarts[1:] { - *lineStarts = append(*lineStarts, ls+start) - } return start, line } diff --git a/internal/parser/tokenizer_test.go b/internal/parser/tokenizer_test.go index 28b64d6..d5f2ce1 100644 --- a/internal/parser/tokenizer_test.go +++ b/internal/parser/tokenizer_test.go @@ -2153,6 +2153,13 @@ func TestLineStartsAccuracy(t *testing.T) { assertLineStarts(t, src, result) assertTokenAt(t, src, result, 0, 0, TokComment, "") }) + + t.Run("HEEX: sigil contents", func(t *testing.T) { + src := "defmodule PageLive do\n def render(assigns) do\n ~H\"\"\"\n
\n \"\"\"\n end\nend" + result := TokenizeFull([]byte(src)) + assertLineStarts(t, src, result) + assertTokenAt(t, src, result, 6, 2, TokEnd, "end") + }) } func TestTokenizeHeex(t *testing.T) { From 270c6400f9b6f6a0f013b4fdff9707c144944dba Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 12:14:29 -0700 Subject: [PATCH 19/31] fix test timeout, improve guards for sigil parsing --- internal/parser/tokenizer_test.go | 2 +- internal/treesitter/tree.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/parser/tokenizer_test.go b/internal/parser/tokenizer_test.go index d5f2ce1..d91b0c7 100644 --- a/internal/parser/tokenizer_test.go +++ b/internal/parser/tokenizer_test.go @@ -2192,7 +2192,7 @@ TokEOF (54:54) } for _, tt := range tests { - err := withTimeout(200_000_000_000, func() { + err := withTimeout(2_000, func() { result := TokenizeHeex([]byte(tt.src)) got := DebugTokens([]byte(tt.src), result.Tokens) if diff := cmp.Diff(tt.want, got); diff != "" { diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index 12f07cb..e3e5ff8 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -50,8 +50,8 @@ func NewTreeWithParsers(src []byte, parsers map[Language]*tree_sitter.Parser) *T heex := make(map[*tree_sitter.Node]*tree_sitter.Tree) visitTree(trunk.RootNode(), func(node *tree_sitter.Node) { if node.Kind() == "quoted_content" && - node.Parent().Kind() == "sigil" && - /* sigil_name */ node.PrevNamedSibling().Utf8Text(src) == "H" { + node.Parent() != nil && node.Parent().Kind() == "sigil" && + /* sigil_name */ node.PrevNamedSibling() != nil && node.PrevNamedSibling().Utf8Text(src) == "H" { tree := parsers[LangHeex].Parse(src[node.StartByte():node.EndByte()], nil) if tree == nil { return From eda0690aaa88a00005f580993cc0d5198a65ca6e Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 13:38:38 -0700 Subject: [PATCH 20/31] replace scan/tokenize with tokenizeUntil --- internal/parser/tokenizer.go | 113 ++++++++--------------------------- 1 file changed, 25 insertions(+), 88 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 7cbd7fc..228c21e 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -1,6 +1,7 @@ package parser import ( + "bytes" "fmt" "strings" "unicode" @@ -135,6 +136,14 @@ func Tokenize(source []byte) []Token { } func TokenizeFull(source []byte) TokenResult { + _, _, result := tokenizeUntil(source, nil) + return result +} + +// tokenizeUntil tokenizes the given source until the given terminator is reached +// in a non-ignored context (ignored when it appears within a comment or string literal). +// Returns the byte offset and line number that tokenizing stopped along with the token result. +func tokenizeUntil(source, terminator []byte) (int, int, TokenResult) { tokens := make([]Token, 0, len(source)/8) lineStarts := make([]int, 1, 64) lineStarts[0] = 0 // line 1 starts at byte 0 @@ -148,6 +157,9 @@ func TokenizeFull(source []byte) TokenResult { // Whitespace, newlines, and comments don't affect afterDot — they preserve it. // Everything else clears it (except the dot case which sets it). switch { + case terminator != nil && bytes.HasPrefix(source[i:], terminator): + return i, line, TokenResult{Tokens: tokens, LineStarts: lineStarts} + case ch == '\n': tokens = append(tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) line++ @@ -542,7 +554,7 @@ func TokenizeFull(source []byte) TokenResult { } tokens = append(tokens, Token{Kind: TokEOF, Start: len(source), End: len(source), Line: line}) - return TokenResult{Tokens: tokens, LineStarts: lineStarts} + return i, line, TokenResult{Tokens: tokens, LineStarts: lineStarts} } // scanStringContent scans from after the opening delimiter to (and including) the matching closing delimiter. @@ -831,19 +843,9 @@ func TokenizeHeex(source []byte) TokenResult { line := 1 i := 0 - matchesSequence := func(src []byte, seq_ string) bool { - seq := []byte(seq_) - for i, c := range seq { - if i >= len(src) || src[i] != c { - return false - } - } - return true - } - scanComment := func(delim string, i, line int, lineStarts *[]int) (int, int) { for i < len(source) { - if matchesSequence(source[i:], delim) { + if bytes.HasPrefix(source[i:], []byte(delim)) { i += len(delim) break } @@ -856,88 +858,25 @@ func TokenizeHeex(source []byte) TokenResult { return i, line } - scanUntil := func(i, line int, terminator string, lineStarts *[]int, tokens *[]Token) (int, int) { - for i < len(source) { - // FIXME: this will get tripped up when the terminator appears within the interpolation - // in an ignored context, such as within a string or comment - if matchesSequence(source[i:], terminator) { - break - } - - if source[i] == '\n' { - *tokens = append(*tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) - line++ - i++ - *lineStarts = append(*lineStarts, i) - } else { - i++ - } - } - - return i, line - } - - scanInterpolation := func(i, line int, terminator string, lineStarts *[]int, tokens *[]Token) (int, int) { + scanInterpolation := func(i, line int, terminator string, tokens *[]Token) (int, int) { start := i startLine := line - // FIXME: this will get tripped up when the terminator appears within the interpolation - // in an ignored context, such as within a string or comment - i, line = scanUntil(i, line, terminator, lineStarts, tokens) - - result := TokenizeFull(source[start:i]) + // lineStarts has already been updated during heredoc scanning + i_, line_, result := tokenizeUntil(source[start:], []byte(terminator)) for _, t := range result.Tokens { if t.Kind != TokEOF { *tokens = append(*tokens, Token{Kind: t.Kind, Start: t.Start + start, End: t.End + start, Line: t.Line + startLine - 1}) } } - for _, ls := range result.LineStarts[1:] { - *lineStarts = append(*lineStarts, ls+start) - } - i += len(terminator) + i += i_ + len(terminator) + line += line_ - 1 return i, line } - scanSpecialForm := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { - for i < len(source) { - switch { - case source[i] == '\n': - *tokens = append(*tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) - line++ - i++ - *lineStarts = append(*lineStarts, i) - - case isWhitespace(source[i]): - i++ - - // FIXME: these forms may introduce variables bindings, so we can't just ignore their contents - // "<% for", "<% if", "<% case", "<% cond", "<% else", "<% end" - case matchesSequence(source[i:], "for") || - matchesSequence(source[i:], "if") || - matchesSequence(source[i:], "else") || - matchesSequence(source[i:], "case") || - matchesSequence(source[i:], "cond") || - matchesSequence(source[i:], "end"): - i, line = scanUntil(i, line, "%>", lineStarts, tokens) - // consume %> - i += 2 - return i, line - - // FIXME: `<% {pattern} -> %>` from case/cond special forms affect scope resolution - // and probably don't tokenize correctly with `scanInterpolation` since they aren't - // complete expressions on their own - - default: - return scanInterpolation(i, line, "%>", lineStarts, tokens) - } - } - - return i, line - } - - scanTagName := func(i, line int, lineStarts *[]int, tokens *[]Token) (int, int) { + scanTagName := func(i, line int, tokens *[]Token) (int, int) { for i < len(source) { switch { // <.foo @@ -1013,7 +952,7 @@ func TokenizeHeex(source []byte) TokenResult { case ch == '{': i++ - return scanInterpolation(i, line, "}", lineStarts, tokens) + return scanInterpolation(i, line, "}", tokens) case ch == '>': return i, line @@ -1041,7 +980,7 @@ func TokenizeHeex(source []byte) TokenResult { // HTML tag name, function, or module/function case !hasName: - i, line = scanTagName(i, line, lineStarts, tokens) + i, line = scanTagName(i, line, tokens) hasName = true // attribute @@ -1076,8 +1015,6 @@ func TokenizeHeex(source []byte) TokenResult { for i < len(source) { ch := source[i] - // Whitespace, newlines, and comments don't affect afterDot — they preserve it. - // Everything else clears it (except the dot case which sets it). switch { case ch == '\n': tokens = append(tokens, Token{Kind: TokEOL, Start: i, End: i + 1, Line: line}) @@ -1116,11 +1053,11 @@ func TokenizeHeex(source []byte) TokenResult { } // EEX interpolation "<%" // EEX special form "<% for", "<% if", "<% case", "<% cond", "<% else", "<% end", "<% _ ->" - i, line = scanSpecialForm(i, line, &lineStarts, &tokens) + i, line = scanInterpolation(i, line, "%>", &tokens) } } else if source[i] == '/' { i++ - i, line = scanTagName(i, line, &lineStarts, &tokens) + i, line = scanTagName(i, line, &tokens) if i < len(source) && source[i] == '>' { i++ } @@ -1134,7 +1071,7 @@ func TokenizeHeex(source []byte) TokenResult { case ch == '{': // HEEX interpolation "{" i++ - i, line = scanInterpolation(i, line, "}", &lineStarts, &tokens) + i, line = scanInterpolation(i, line, "}", &tokens) default: i++ From a52a0cd0a09789b1a8b154616c5813c13517a65f Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 15:46:18 -0700 Subject: [PATCH 21/31] wip: recursive Elixir/HEEX tree-sitter nested trees --- internal/lsp/server_test.go | 57 ++++++++++++++++++++ internal/parser/tokenizer_test.go | 3 ++ internal/treesitter/tree.go | 51 ++++++++++++------ internal/treesitter/variables.go | 86 +++++++++++++++++++++---------- 4 files changed, 153 insertions(+), 44 deletions(-) diff --git a/internal/lsp/server_test.go b/internal/lsp/server_test.go index 6c0deff..54d74a7 100644 --- a/internal/lsp/server_test.go +++ b/internal/lsp/server_test.go @@ -3,6 +3,7 @@ package lsp import ( "context" "fmt" + "log" "os" "os/exec" "path/filepath" @@ -16,6 +17,7 @@ import ( "github.com/remoteoss/dexter/internal/parser" "github.com/remoteoss/dexter/internal/stdlib" "github.com/remoteoss/dexter/internal/store" + "github.com/remoteoss/dexter/internal/treesitter" ) func setupTestServer(t *testing.T) (*Server, func()) { @@ -2921,6 +2923,61 @@ end } } +func TestPlayground(t *testing.T) { + src := `~H""" +{@foo} +"""` + tree := treesitter.NewTree([]byte(src)) + defer tree.Close() + + for _, t := range tree.Branches { + log.Printf("t: %s", t.Trunk.RootNode().ToSexp()) + for _, tt := range t.Branches { + log.Printf("tt: %s", tt.Trunk.RootNode().ToSexp()) + } + } +} + +func TestReferences_HEEXNestedReference(t *testing.T) { + server, cleanup := setupTestServer(t) + defer cleanup() + + // Nested module: defmodule MoneyResponse inside Money creates + // MyApp.Money.MoneyResponse, but the defmodule line says just "MoneyResponse" + src := `defmodule App do + use Phoenix.LiveView + + def foo(assigns), do: ~H"" + + def render(assigns) do + ~H""" + <.foo /> + + """ + end +end +` + indexFile(t, server.store, server.projectRoot, "lib/app.ex", src) + + uri := "file://" + filepath.Join(server.projectRoot, "lib", "app.ex") + server.docs.Set(uri, src) + + // Go-to-references on "foo" in the <.foo /> component (line 8, col 6) + locs := referencesAt(t, server, uri, 7, 6) + log.Printf("%+v", locs) + if len(locs) == 0 { + t.Fatal("expected references for function foo") + } + + // Go-to-references on "foo" in the def line (line 4, col 6) + locs = referencesAt(t, server, uri, 3, 6) + log.Printf("%+v", locs) + if len(locs) == 0 { + t.Fatal("expected references for function foo") + } + +} + func TestDefinition_QualifiedCallOnNestedModule(t *testing.T) { server, cleanup := setupTestServer(t) defer cleanup() diff --git a/internal/parser/tokenizer_test.go b/internal/parser/tokenizer_test.go index d91b0c7..ad1f313 100644 --- a/internal/parser/tokenizer_test.go +++ b/internal/parser/tokenizer_test.go @@ -2188,6 +2188,9 @@ TokModule (34:37) "Foo" TokDot (37:38) TokModule (38:41) "Bar" TokEOF (54:54) +`}, + {"
", `TokString (12:16) "\"{}\"" +TokEOF (20:20) `}, } diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index e3e5ff8..24bf1d3 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -1,6 +1,7 @@ package treesitter import ( + "log" "unsafe" tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" @@ -8,20 +9,24 @@ import ( tree_sitter_elixir "github.com/tree-sitter/tree-sitter-elixir/bindings/go" ) -// Tree contains an Elixir document tree and a map of any HEEX sub-trees. -// Heex is a map of `quoted_content` nodes within sigils in the document -// tree to their corresponding HEEX sub-tree. +// Tree contains a document trunk tree and a map of any branch sub-trees. +// For Elixir trunks, Branches is a map of `quoted_content` node IDs within sigils +// in the document tree to their corresponding HEEX sub-tree. For HEEX trunks, +// Branches is a map of `expression_value` node IDs within interpolated expressions +// in the document tree to their corresponding Elixir sub-tree. Sub-trees may +// be nested arbitrarily deep, though in practice it will typically be 1-3 levels. // -// (sigil (sigil_name) node: (quoted_content)) +// Elixir->HEEX: (sigil (sigil_name) node: (quoted_content)) +// HEEX->Elixir: (expression node: (expression_value)) type Tree struct { - Trunk *tree_sitter.Tree - Heex map[*tree_sitter.Node]*tree_sitter.Tree + Trunk *tree_sitter.Tree + Branches map[uintptr]*Tree } // Close closes the trunk tree and any HEEX sub-trees. func (t *Tree) Close() { - for _, ht := range t.Heex { - ht.Close() + for _, b := range t.Branches { + b.Close() } t.Trunk.Close() } @@ -42,28 +47,40 @@ func NewTree(src []byte) *Tree { // NewTreeWithParsers parses src, parses nested HEEX templates, and returns the created trees. // Used by cached entry points . Returns nil on failure. func NewTreeWithParsers(src []byte, parsers map[Language]*tree_sitter.Parser) *Tree { - trunk := parsers[LangElixir].Parse(src, nil) + return newTree(LangElixir, src, parsers) +} + +func newTree(lang Language, src []byte, parsers map[Language]*tree_sitter.Parser) *Tree { + trunk := parsers[lang].Parse(src, nil) if trunk == nil { return nil } - heex := make(map[*tree_sitter.Node]*tree_sitter.Tree) + branches := make(map[uintptr]*Tree) visitTree(trunk.RootNode(), func(node *tree_sitter.Node) { - if node.Kind() == "quoted_content" && + // when visiting Elixir trees, parse nested ~H sigils as HEEX sub-trees + if lang == LangElixir && + node.Kind() == "quoted_content" && node.Parent() != nil && node.Parent().Kind() == "sigil" && /* sigil_name */ node.PrevNamedSibling() != nil && node.PrevNamedSibling().Utf8Text(src) == "H" { - tree := parsers[LangHeex].Parse(src[node.StartByte():node.EndByte()], nil) - if tree == nil { - return + log.Printf("HEEX sub-tree %d at %s", node.Id(), node.ToSexp()) + if tree := newTree(LangHeex, src[node.StartByte():node.EndByte()], parsers); tree != nil { + branches[node.Id()] = tree } + } - heex[node] = tree + // when visiting HEEX trees, parse nested expressions as Elixir sub-trees + if lang == LangHeex && node.Kind() == "expression_value" { + log.Printf("Elixir sub-tree %d at %s", node.Id(), node.ToSexp()) + if tree := newTree(LangElixir, src[node.StartByte():node.EndByte()], parsers); tree != nil { + branches[node.Id()] = tree + } } }) return &Tree{ - Trunk: trunk, - Heex: heex, + Trunk: trunk, + Branches: branches, } } diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index f7298fb..1b0585d 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,6 +1,7 @@ package treesitter import ( + "log" "strings" tree_sitter "github.com/tree-sitter/go-tree-sitter" @@ -139,34 +140,40 @@ type resolvedScope struct { // a variable or module attribute, and returns the enclosing scope. Returns nil // if the position is not on a renameable variable. func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { - cursorNode := nodeAtPosition(t.Trunk.RootNode(), line, col) - if cursorNode == nil || cursorNode.Kind() != "identifier" { + cursorNode := t.nodeAtPosition(t.Trunk.RootNode(), line, col) + log.Printf("cursorNode(%d, %d): [%d:%d] [%d:%d]\n%s\n%s\n", + line, col, cursorNode.node.StartByte(), cursorNode.node.EndByte(), + cursorNode.node.StartByte(), cursorNode.node.EndByte(), cursorNode.utf8Text(src), src) + + if cursorNode == nil || cursorNode.node.Kind() != "identifier" { return nil } - varName := cursorNode.Utf8Text(src) + varName := cursorNode.utf8Text(src) if isDefinitionKeyword(varName) { return nil } + cursorNode_ := cursorNode.trunkNode() + // Module attribute (@foo or @foo value): scope is the enclosing defmodule. - if isModuleAttributeIdent(cursorNode, src) { - scope := findEnclosingModule(cursorNode, src) + if isModuleAttributeIdent(cursorNode_, src) { + scope := findEnclosingModule(cursorNode_, src) if scope == nil { return nil } - return &resolvedScope{cursorNode: cursorNode, scope: scope, varName: varName, moduleAttribute: true} + return &resolvedScope{cursorNode: cursorNode_, scope: scope, varName: varName, moduleAttribute: true} } // Check it's actually a variable — not a function name in a call or def keyword - if isFunctionNameInCall(cursorNode, src) { + if isFunctionNameInCall(cursorNode_, src) { return nil } // Find the enclosing scope: a stab_clause that binds this variable, or // the enclosing def/defp/defmacro/test call. - scope := findEnclosingScope(cursorNode, src, varName) + scope := findEnclosingScope(cursorNode_, src, varName) if scope == nil { return nil } @@ -177,11 +184,11 @@ func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { // This ensures bare function calls fall through to function reference lookup. // Exception: if the cursor is on an assignment target (LHS of =), it is // unambiguously a variable binding regardless of other occurrences. - if !isAssignmentTarget(cursorNode, src) && !variableDefinedInScope(scope, src, varName, line, col) { + if !isAssignmentTarget(cursorNode_, src) && !variableDefinedInScope(scope, src, varName, line, col) { return nil } - return &resolvedScope{cursorNode: cursorNode, scope: scope, varName: varName} + return &resolvedScope{cursorNode: cursorNode_, scope: scope, varName: varName} } // moduleAttributeExists returns true if @name appears in the subtree. @@ -200,34 +207,59 @@ func moduleAttributeExists(node *tree_sitter.Node, src []byte, name string) bool return false } -// nodeAtPosition finds the deepest (most specific) node at the given position. -func nodeAtPosition(node *tree_sitter.Node, line, col uint) *tree_sitter.Node { - if node == nil { - return nil +// resolvedNode indicates a node that's been found, either directly in the trunk or +// within a sub-tree. For trunk nodes, parentTree will be nil. For sub-tree nodes, +// parentTree will point to the trunk node that contains the resolved node. +type resolvedNode struct { + node *tree_sitter.Node + parent *resolvedNode +} + +// FIXME: remove once all resolvedNode usage respects sub-trees +func (rn *resolvedNode) trunkNode() *tree_sitter.Node { + if rn.parent == nil { + return rn.node } - start := node.StartPosition() - end := node.EndPosition() + return rn.parent.trunkNode() +} - // Check if position is within this node - if line < uint(start.Row) || line > uint(end.Row) { - return nil +func (rn *resolvedNode) utf8Text(src []byte) string { + if rn.parent == nil { + return rn.node.Utf8Text(src) } - if line == uint(start.Row) && col < uint(start.Column) { + return rn.parent.utf8Text(src)[rn.node.StartByte():rn.node.EndByte()] +} + +// nodeAtPosition finds the deepest (most specific) node at the given position. +func (t *Tree) nodeAtPosition(node *tree_sitter.Node, line, col uint) *resolvedNode { + return t.nodeAtPositionInParent(nil, node, line, col) +} + +func (t *Tree) nodeAtPositionInParent(parent *resolvedNode, node *tree_sitter.Node, line, col uint) *resolvedNode { + // Check if position is within this node + if node == nil || !nodeContainsPosition(node, line, col) { return nil } - if line == uint(end.Row) && col >= uint(end.Column) { - return nil + + // Try to find a child within a sub-tree + if branch := t.Branches[node.Id()]; branch != nil { + log.Printf("submerging from %d", node.Id()) + parent = &resolvedNode{node: node, parent: parent} + if resolved := branch.nodeAtPositionInParent(parent, branch.Trunk.RootNode(), line-node.StartPosition().Row, col); resolved != nil { + return resolved + } + return &resolvedNode{node: node, parent: parent} } // Try to find a more specific child for i := uint(0); i < uint(node.ChildCount()); i++ { child := node.Child(i) - if found := nodeAtPosition(child, line, col); found != nil { + if found := t.nodeAtPositionInParent(parent, child, line, col); found != nil { return found } } - return node + return &resolvedNode{node: node, parent: parent} } // isFunctionNameInCall returns true if the identifier is the function name @@ -740,15 +772,15 @@ func FindVariablesInScope(src []byte, line, col uint) []string { // FindVariablesInScopeWithTree is like FindVariablesInScope but uses a // pre-parsed tree root. func (t *Tree) FindVariablesInScope(src []byte, line, col uint) []string { - cursorNode := nodeAtPosition(t.Trunk.RootNode(), line, col) + cursorNode := t.nodeAtPosition(t.Trunk.RootNode(), line, col) if cursorNode == nil && col > 0 { - cursorNode = nodeAtPosition(t.Trunk.RootNode(), line, col-1) + cursorNode = t.nodeAtPosition(t.Trunk.RootNode(), line, col-1) } if cursorNode == nil { return nil } - scope := findEnclosingFunction(cursorNode, src) + scope := findEnclosingFunction(cursorNode.trunkNode(), src) if scope == nil { return nil } From a3968e9c524671275a6438660961bde76020c061 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 16:10:53 -0700 Subject: [PATCH 22/31] [skip ci] wip: use resolvedNode during variable resolution --- internal/treesitter/variables.go | 95 +++++++++++++++++++------------- 1 file changed, 58 insertions(+), 37 deletions(-) diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 1b0585d..8e47f35 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -141,9 +141,8 @@ type resolvedScope struct { // if the position is not on a renameable variable. func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { cursorNode := t.nodeAtPosition(t.Trunk.RootNode(), line, col) - log.Printf("cursorNode(%d, %d): [%d:%d] [%d:%d]\n%s\n%s\n", - line, col, cursorNode.node.StartByte(), cursorNode.node.EndByte(), - cursorNode.node.StartByte(), cursorNode.node.EndByte(), cursorNode.utf8Text(src), src) + log.Printf("cursorNode(%d, %d): [%d:%d]\n%s\n", + line, col, cursorNode.node.StartByte(), cursorNode.node.EndByte(), cursorNode.utf8Text(src)) if cursorNode == nil || cursorNode.node.Kind() != "identifier" { return nil @@ -173,7 +172,7 @@ func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { // Find the enclosing scope: a stab_clause that binds this variable, or // the enclosing def/defp/defmacro/test call. - scope := findEnclosingScope(cursorNode_, src, varName) + scope := findEnclosingScope(cursorNode, src, varName) if scope == nil { return nil } @@ -184,11 +183,11 @@ func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { // This ensures bare function calls fall through to function reference lookup. // Exception: if the cursor is on an assignment target (LHS of =), it is // unambiguously a variable binding regardless of other occurrences. - if !isAssignmentTarget(cursorNode_, src) && !variableDefinedInScope(scope, src, varName, line, col) { + if !isAssignmentTarget(cursorNode, src) && !variableDefinedInScope(scope, src, varName, line, col) { return nil } - return &resolvedScope{cursorNode: cursorNode_, scope: scope, varName: varName} + return &resolvedScope{cursorNode: cursorNode_, scope: scope.node, varName: varName} } // moduleAttributeExists returns true if @name appears in the subtree. @@ -211,23 +210,45 @@ func moduleAttributeExists(node *tree_sitter.Node, src []byte, name string) bool // within a sub-tree. For trunk nodes, parentTree will be nil. For sub-tree nodes, // parentTree will point to the trunk node that contains the resolved node. type resolvedNode struct { - node *tree_sitter.Node - parent *resolvedNode + node *tree_sitter.Node + parentNode *resolvedNode } // FIXME: remove once all resolvedNode usage respects sub-trees func (rn *resolvedNode) trunkNode() *tree_sitter.Node { - if rn.parent == nil { + if rn.parentNode == nil { return rn.node } - return rn.parent.trunkNode() + return rn.parentNode.trunkNode() +} + +// parent moves up to the resolved node's nearest parent in the same tree. +// If the node has no parent, moves to the parent tree node instead. +func (rn *resolvedNode) parent() *resolvedNode { + if parent := rn.node.Parent(); parent != nil { + return &resolvedNode{parentNode: rn.parentNode, node: parent} + } + return rn.parentNode +} + +func (rn *resolvedNode) startPosition() tree_sitter.Point { + if rn.parentNode == nil { + return rn.node.StartPosition() + } + p := rn.parentNode.startPosition() + p.Row += rn.node.StartPosition().Row + return p +} + +func (rn *resolvedNode) child(i uint) *resolvedNode { + return &resolvedNode{parentNode: rn.parentNode, node: rn.node.Child(i)} } func (rn *resolvedNode) utf8Text(src []byte) string { - if rn.parent == nil { + if rn.parentNode == nil { return rn.node.Utf8Text(src) } - return rn.parent.utf8Text(src)[rn.node.StartByte():rn.node.EndByte()] + return rn.parentNode.utf8Text(src)[rn.node.StartByte():rn.node.EndByte()] } // nodeAtPosition finds the deepest (most specific) node at the given position. @@ -244,11 +265,11 @@ func (t *Tree) nodeAtPositionInParent(parent *resolvedNode, node *tree_sitter.No // Try to find a child within a sub-tree if branch := t.Branches[node.Id()]; branch != nil { log.Printf("submerging from %d", node.Id()) - parent = &resolvedNode{node: node, parent: parent} + parent = &resolvedNode{node: node, parentNode: parent} if resolved := branch.nodeAtPositionInParent(parent, branch.Trunk.RootNode(), line-node.StartPosition().Row, col); resolved != nil { return resolved } - return &resolvedNode{node: node, parent: parent} + return &resolvedNode{node: node, parentNode: parent} } // Try to find a more specific child @@ -259,7 +280,7 @@ func (t *Tree) nodeAtPositionInParent(parent *resolvedNode, node *tree_sitter.No } } - return &resolvedNode{node: node, parent: parent} + return &resolvedNode{node: node, parentNode: parent} } // isFunctionNameInCall returns true if the identifier is the function name @@ -358,16 +379,16 @@ func definesNestedScope(node *tree_sitter.Node, src []byte) bool { // isAssignmentTarget returns true if node is on the left-hand side of a `=` // binary operator, meaning it is unambiguously a variable binding. -func isAssignmentTarget(node *tree_sitter.Node, src []byte) bool { - parent := node.Parent() - if parent == nil || parent.Kind() != "binary_operator" || parent.ChildCount() < 3 { +func isAssignmentTarget(node *resolvedNode, src []byte) bool { + parent := node.parent() + if parent == nil || parent.node.Kind() != "binary_operator" || parent.node.ChildCount() < 3 { return false } - if parent.Child(1).Utf8Text(src) != "=" { + if parent.node.Child(1).Utf8Text(src) != "=" { return false } - left := parent.Child(0) - return node.StartByte() >= left.StartByte() && node.EndByte() <= left.EndByte() + left := parent.node.Child(0) + return node.node.StartByte() >= left.StartByte() && node.node.EndByte() <= left.EndByte() } // variableDefinedInScope returns true if varName is bound (defined) in the @@ -375,7 +396,7 @@ func isAssignmentTarget(node *tree_sitter.Node, src []byte) bool { // at a position other than the cursor. A bare identifier that only appears // at the cursor position is ambiguous (could be a zero-arity function call) // and should not be treated as a variable. -func variableDefinedInScope(scope *tree_sitter.Node, src []byte, varName string, cursorLine, cursorCol uint) bool { +func variableDefinedInScope(scope *resolvedNode, src []byte, varName string, cursorLine, cursorCol uint) bool { return identifierExistsElsewhere(scope, src, varName, cursorLine, cursorCol, true) } @@ -386,21 +407,21 @@ func variableDefinedInScope(scope *tree_sitter.Node, src []byte, varName string, // the chosen scope itself, which may be such a def call) — otherwise a bare // top-level call sharing a name with a function-local would be misread as a // variable. -func identifierExistsElsewhere(node *tree_sitter.Node, src []byte, name string, line, col uint, isRoot bool) bool { +func identifierExistsElsewhere(node *resolvedNode, src []byte, name string, line, col uint, isRoot bool) bool { if node == nil { return false } if !isRoot && definesNestedScope(node, src) { return false } - if node.Kind() == "identifier" && node.Utf8Text(src) == name && !isFunctionNameInCall(node, src) { - pos := node.StartPosition() + if node.node.Kind() == "identifier" && node.utf8Text(src) == name && !isFunctionNameInCall(node, src) { + pos := node.startPosition() if uint(pos.Row) != line || uint(pos.Column) != col { return true } } - for i := uint(0); i < uint(node.ChildCount()); i++ { - if identifierExistsElsewhere(node.Child(i), src, name, line, col, false) { + for i := uint(0); i < uint(node.node.ChildCount()); i++ { + if identifierExistsElsewhere(node.child(i), src, name, line, col, false) { return true } } @@ -415,12 +436,12 @@ func identifierExistsElsewhere(node *tree_sitter.Node, src []byte, name string, // boundary ONLY when the cursor is inside the do_block — not when it's on the // right side of a <- clause, which is evaluated in the outer scope. // Otherwise, the enclosing def/defp/defmacro/test call is the scope. -func findEnclosingScope(node *tree_sitter.Node, src []byte, varName string) *tree_sitter.Node { +func findEnclosingScope(node *resolvedNode, src []byte, varName string) *resolvedNode { prev := node - current := node.Parent() + current := node.parent() for current != nil { - if current.Kind() == "stab_clause" { - if stabBindsVariable(current, src, varName) { + if current.node.Kind() == "stab_clause" { + if stabBindsVariable(current.node, src, varName) { return current } // Body rebinds the variable (e.g. `fn ^x -> x = nil end`): the @@ -428,12 +449,12 @@ func findEnclosingScope(node *tree_sitter.Node, src []byte, varName string) *tre // Note: if the cursor is on a closure reference BEFORE the rebind // in the same body, it will be scoped to the fn rather than the // outer function. This is an acceptable limitation for a rare pattern. - if stabBodyRebindsVariable(current, src, varName) { + if stabBodyRebindsVariable(current.node, src, varName) { return current } } - if current.Kind() == "call" && current.ChildCount() > 0 { - firstChild := current.Child(0) + if current.node.Kind() == "call" && current.node.ChildCount() > 0 { + firstChild := current.node.Child(0) if firstChild.Kind() == "identifier" && functionKeywords[firstChild.Utf8Text(src)] { return current } @@ -444,8 +465,8 @@ func findEnclosingScope(node *tree_sitter.Node, src []byte, varName string) *tre return current } // with/for/etc.: scope boundary unless cursor is on clause 0's rhs (outer scope). - if callHasDoBlock(current) && callArgumentPatternsBindVariable(current, src, varName) { - if cursorNeedsWithScope(current, prev, node, src, varName) { + if callHasDoBlock(current.node) && callArgumentPatternsBindVariable(current.node, src, varName) { + if cursorNeedsWithScope(current.node, prev.node, node.node, src, varName) { return current } } @@ -456,7 +477,7 @@ func findEnclosingScope(node *tree_sitter.Node, src []byte, varName string) *tre return current } prev = current - current = current.Parent() + current = current.parent() } return nil } From 03c9f75577849523c2cd8a0a07175278e079b291 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Wed, 10 Jun 2026 23:29:42 -0700 Subject: [PATCH 23/31] wip: variables.go tree_sitter.Node -> TreeNode --- internal/parser/tokenizer.go | 19 ++- internal/treesitter/tree.go | 172 +++++++++++++++++++++-- internal/treesitter/variables.go | 227 +++++++++---------------------- 3 files changed, 238 insertions(+), 180 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 228c21e..8ec7522 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -1272,14 +1272,19 @@ func DebugTokens(source []byte, tokens []Token) string { var s strings.Builder for _, t := range tokens { - switch t.Kind { - case TokDot, TokEOL, TokEOF, TokOpenBrace, TokCloseBrace: - fmt.Fprintf(&s, "%s (%d:%d)\n", t.Kind.String(), t.Start, t.End) - - default: - fmt.Fprintf(&s, "%s (%d:%d) %#v\n", t.Kind.String(), t.Start, t.End, TokenText(source, t)) - } + s.WriteString(t.Debug(source)) } return s.String() } + +// Debug returns a string representation similar to %+v for a token. +func (token Token) Debug(source []byte) string { + switch token.Kind { + case TokDot, TokEOL, TokEOF, TokOpenBrace, TokCloseBrace: + return fmt.Sprintf("%s (%d:%d)\n", token.Kind.String(), token.Start, token.End) + + default: + return fmt.Sprintf("%s (%d:%d) %#v\n", token.Kind.String(), token.Start, token.End, TokenText(source, token)) + } +} diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index 24bf1d3..7196af6 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -1,7 +1,6 @@ package treesitter import ( - "log" "unsafe" tree_sitter_heex "github.com/phoenixframework/tree-sitter-heex/bindings/go" @@ -16,11 +15,22 @@ import ( // in the document tree to their corresponding Elixir sub-tree. Sub-trees may // be nested arbitrarily deep, though in practice it will typically be 1-3 levels. // +// For nested sub-trees, Root and RootNode point back to the parent tree branch +// and node that contains the sub-tree. Navigation is possible both up (using Parent()) +// and down (using ChildCount() and Child(i)). +// // Elixir->HEEX: (sigil (sigil_name) node: (quoted_content)) // HEEX->Elixir: (expression node: (expression_value)) type Tree struct { + Root *TreeNode Trunk *tree_sitter.Tree Branches map[uintptr]*Tree + Language Language +} + +// TrunkNode returns a TreeNode pointing to the root node of the trunk. +func (t *Tree) TrunkNode() *TreeNode { + return &TreeNode{Tree: t, Node: t.Trunk.RootNode()} } // Close closes the trunk tree and any HEEX sub-trees. @@ -31,6 +41,146 @@ func (t *Tree) Close() { t.Trunk.Close() } +// TreeNode represents a node within a tree or sub-tree. +// This facilitates traversal between trunk trees and branch sub-trees. +type TreeNode struct { + Tree *Tree + Node *tree_sitter.Node +} + +// See tree_sitter.Node.Kind(). +func (tn *TreeNode) Kind() string { + return tn.Node.Kind() +} + +// See tree_sitter.Node.IsNamed(). +func (tn *TreeNode) IsNamed() bool { + return tn.Node.IsNamed() +} + +// See tree_sitter.Node.StartByte(). +func (tn *TreeNode) StartByte() uint { + if tn.Tree.Root == nil { + return tn.Node.StartByte() + } + return tn.Tree.Root.StartByte() + tn.Node.StartByte() +} + +// See tree_sitter.Node.EndByte(). +func (tn *TreeNode) EndByte() uint { + if tn.Tree.Root == nil { + return tn.Node.EndByte() + } + return tn.Tree.Root.StartByte() + tn.Node.EndByte() +} + +// Parent returns the node containing the given node in the tree, or the node +// in the root tree that contains the node if the node is the root of a branch +// sub-tree. If the node is the top-most root, returns nil. +func (tn *TreeNode) Parent() *TreeNode { + if parent := tn.Node.Parent(); parent != nil { + return &TreeNode{Tree: tn.Tree, Node: parent} + } + return tn.Tree.Root +} + +// ChildCount returns the number of children for the given node, returning +// 1 for nodes that link to a branch sub-tree. +func (tn *TreeNode) ChildCount() uint { + if branch := tn.Tree.Branches[tn.Node.Id()]; branch != nil { + return 1 + } + return tn.Node.ChildCount() +} + +// Child returns the tree/child of the given node, moving into a sub-tree if +// the node links to a branch sub-tree. +func (tn *TreeNode) Child(i uint) *TreeNode { + if branch := tn.Tree.Branches[tn.Node.Id()]; branch != nil { + return branch.TrunkNode() + } + return &TreeNode{Tree: tn.Tree, Node: tn.Node.Child(i)} +} + +// StartPosition returns the (row, col) start position of the given node +// within the top-most root tree. +func (tn *TreeNode) StartPosition() tree_sitter.Point { + if tn.Tree.Root == nil { + return tn.Node.StartPosition() + } + p := tn.Tree.Root.StartPosition() + sp := tn.Node.StartPosition() + p.Row += sp.Row + if sp.Row == p.Row { + p.Column += sp.Column + } else { + p.Column = sp.Column + } + return p +} + +// EndPosition returns the (row, col) end position of the given node +// within the top-most root tree. +func (tn *TreeNode) EndPosition() tree_sitter.Point { + if tn.Tree.Root == nil { + return tn.Node.EndPosition() + } + p := tn.Tree.Root.StartPosition() + ep := tn.Node.EndPosition() + p.Row += ep.Row + if ep.Row == p.Row { + p.Column += ep.Column + } else { + p.Column = ep.Column + } + return p +} + +// Utf8Text returns the UTF-8 encoded string representation of the given node +// within the top-most root tree. +func (tn *TreeNode) Utf8Text(src []byte) string { + if tn.Tree.Root == nil { + return tn.Node.Utf8Text(src) + } + return tn.Tree.Root.Utf8Text(src)[tn.Node.StartByte():tn.Node.EndByte()] +} + +// ContainsPosition returns true if the node contains the given position +// in the top-most root tree. Tree-sitter end positions are exclusive, +// consistent with nodeAtPosition. +func (tn *TreeNode) ContainsPosition(line, col uint) bool { + start := tn.StartPosition() + end := tn.EndPosition() + if line < uint(start.Row) || line > uint(end.Row) { + return false + } + if line == uint(start.Row) && col < uint(start.Column) { + return false + } + if line == uint(end.Row) && col >= uint(end.Column) { + return false + } + return true +} + +// ChildAtPosition find the deepest (most specific) child node at the given position +// within the top-most root tree. +func (tn *TreeNode) ChildAtPosition(line, col uint) *TreeNode { + // Check if position is within this node + if tn == nil || !tn.ContainsPosition(line, col) { + return nil + } + + // Try to find a more specific child + for i := uint(0); i < tn.ChildCount(); i++ { + if found := tn.Child(i).ChildAtPosition(line, col); found != nil { + return found + } + } + + return tn +} + // NewTree creates parsers, parses src, parses nested HEEX templates, and returns the created trees. // Used by the standalone (non-cached) entry points. Returns nil on failure. func NewTree(src []byte) *Tree { @@ -56,32 +206,34 @@ func newTree(lang Language, src []byte, parsers map[Language]*tree_sitter.Parser return nil } - branches := make(map[uintptr]*Tree) + t := &Tree{ + Language: lang, + Trunk: trunk, + Branches: make(map[uintptr]*Tree), + } + visitTree(trunk.RootNode(), func(node *tree_sitter.Node) { // when visiting Elixir trees, parse nested ~H sigils as HEEX sub-trees if lang == LangElixir && node.Kind() == "quoted_content" && node.Parent() != nil && node.Parent().Kind() == "sigil" && /* sigil_name */ node.PrevNamedSibling() != nil && node.PrevNamedSibling().Utf8Text(src) == "H" { - log.Printf("HEEX sub-tree %d at %s", node.Id(), node.ToSexp()) if tree := newTree(LangHeex, src[node.StartByte():node.EndByte()], parsers); tree != nil { - branches[node.Id()] = tree + tree.Root = &TreeNode{Tree: t, Node: node} + t.Branches[node.Id()] = tree } } // when visiting HEEX trees, parse nested expressions as Elixir sub-trees if lang == LangHeex && node.Kind() == "expression_value" { - log.Printf("Elixir sub-tree %d at %s", node.Id(), node.ToSexp()) if tree := newTree(LangElixir, src[node.StartByte():node.EndByte()], parsers); tree != nil { - branches[node.Id()] = tree + tree.Root = &TreeNode{Tree: t, Node: node} + t.Branches[node.Id()] = tree } } }) - return &Tree{ - Trunk: trunk, - Branches: branches, - } + return t } type Language byte diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 8e47f35..8e0e2f5 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -1,7 +1,6 @@ package treesitter import ( - "log" "strings" tree_sitter "github.com/tree-sitter/go-tree-sitter" @@ -106,11 +105,11 @@ func (t *Tree) NameExistsInScopeOf(src []byte, line, col uint, newName string) b // descended into — a same-named binding inside one is not a collision in the // scope rooted at node. (The root itself may be such a def call when renaming a // function-local; that is the chosen scope and is always searched.) -func findFirstNonCallIdentifier(node *tree_sitter.Node, src []byte, name string) *tree_sitter.Node { +func findFirstNonCallIdentifier(node *TreeNode, src []byte, name string) *TreeNode { return findFirstNonCallIdentifierInScope(node, src, name, true) } -func findFirstNonCallIdentifierInScope(node *tree_sitter.Node, src []byte, name string, isRoot bool) *tree_sitter.Node { +func findFirstNonCallIdentifierInScope(node *TreeNode, src []byte, name string, isRoot bool) *TreeNode { if node == nil { return nil } @@ -130,8 +129,8 @@ func findFirstNonCallIdentifierInScope(node *tree_sitter.Node, src []byte, name // resolvedScope holds the result of locating a variable's scope. type resolvedScope struct { - cursorNode *tree_sitter.Node - scope *tree_sitter.Node + cursorNode *TreeNode + scope *TreeNode varName string moduleAttribute bool // true when the identifier is a module attribute (@foo) } @@ -140,33 +139,29 @@ type resolvedScope struct { // a variable or module attribute, and returns the enclosing scope. Returns nil // if the position is not on a renameable variable. func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { - cursorNode := t.nodeAtPosition(t.Trunk.RootNode(), line, col) - log.Printf("cursorNode(%d, %d): [%d:%d]\n%s\n", - line, col, cursorNode.node.StartByte(), cursorNode.node.EndByte(), cursorNode.utf8Text(src)) + cursorNode := t.TrunkNode().ChildAtPosition(line, col) - if cursorNode == nil || cursorNode.node.Kind() != "identifier" { + if cursorNode == nil || cursorNode.Kind() != "identifier" { return nil } - varName := cursorNode.utf8Text(src) + varName := cursorNode.Utf8Text(src) if isDefinitionKeyword(varName) { return nil } - cursorNode_ := cursorNode.trunkNode() - // Module attribute (@foo or @foo value): scope is the enclosing defmodule. - if isModuleAttributeIdent(cursorNode_, src) { - scope := findEnclosingModule(cursorNode_, src) + if isModuleAttributeIdent(cursorNode, src) { + scope := findEnclosingModule(cursorNode, src) if scope == nil { return nil } - return &resolvedScope{cursorNode: cursorNode_, scope: scope, varName: varName, moduleAttribute: true} + return &resolvedScope{cursorNode: cursorNode, scope: scope, varName: varName, moduleAttribute: true} } // Check it's actually a variable — not a function name in a call or def keyword - if isFunctionNameInCall(cursorNode_, src) { + if isFunctionNameInCall(cursorNode, src) { return nil } @@ -187,11 +182,11 @@ func (t *Tree) resolveVariableScope(src []byte, line, col uint) *resolvedScope { return nil } - return &resolvedScope{cursorNode: cursorNode_, scope: scope.node, varName: varName} + return &resolvedScope{cursorNode: cursorNode, scope: scope, varName: varName} } // moduleAttributeExists returns true if @name appears in the subtree. -func moduleAttributeExists(node *tree_sitter.Node, src []byte, name string) bool { +func moduleAttributeExists(node *TreeNode, src []byte, name string) bool { if node == nil { return false } @@ -206,87 +201,10 @@ func moduleAttributeExists(node *tree_sitter.Node, src []byte, name string) bool return false } -// resolvedNode indicates a node that's been found, either directly in the trunk or -// within a sub-tree. For trunk nodes, parentTree will be nil. For sub-tree nodes, -// parentTree will point to the trunk node that contains the resolved node. -type resolvedNode struct { - node *tree_sitter.Node - parentNode *resolvedNode -} - -// FIXME: remove once all resolvedNode usage respects sub-trees -func (rn *resolvedNode) trunkNode() *tree_sitter.Node { - if rn.parentNode == nil { - return rn.node - } - return rn.parentNode.trunkNode() -} - -// parent moves up to the resolved node's nearest parent in the same tree. -// If the node has no parent, moves to the parent tree node instead. -func (rn *resolvedNode) parent() *resolvedNode { - if parent := rn.node.Parent(); parent != nil { - return &resolvedNode{parentNode: rn.parentNode, node: parent} - } - return rn.parentNode -} - -func (rn *resolvedNode) startPosition() tree_sitter.Point { - if rn.parentNode == nil { - return rn.node.StartPosition() - } - p := rn.parentNode.startPosition() - p.Row += rn.node.StartPosition().Row - return p -} - -func (rn *resolvedNode) child(i uint) *resolvedNode { - return &resolvedNode{parentNode: rn.parentNode, node: rn.node.Child(i)} -} - -func (rn *resolvedNode) utf8Text(src []byte) string { - if rn.parentNode == nil { - return rn.node.Utf8Text(src) - } - return rn.parentNode.utf8Text(src)[rn.node.StartByte():rn.node.EndByte()] -} - -// nodeAtPosition finds the deepest (most specific) node at the given position. -func (t *Tree) nodeAtPosition(node *tree_sitter.Node, line, col uint) *resolvedNode { - return t.nodeAtPositionInParent(nil, node, line, col) -} - -func (t *Tree) nodeAtPositionInParent(parent *resolvedNode, node *tree_sitter.Node, line, col uint) *resolvedNode { - // Check if position is within this node - if node == nil || !nodeContainsPosition(node, line, col) { - return nil - } - - // Try to find a child within a sub-tree - if branch := t.Branches[node.Id()]; branch != nil { - log.Printf("submerging from %d", node.Id()) - parent = &resolvedNode{node: node, parentNode: parent} - if resolved := branch.nodeAtPositionInParent(parent, branch.Trunk.RootNode(), line-node.StartPosition().Row, col); resolved != nil { - return resolved - } - return &resolvedNode{node: node, parentNode: parent} - } - - // Try to find a more specific child - for i := uint(0); i < uint(node.ChildCount()); i++ { - child := node.Child(i) - if found := t.nodeAtPositionInParent(parent, child, line, col); found != nil { - return found - } - } - - return &resolvedNode{node: node, parentNode: parent} -} - // isFunctionNameInCall returns true if the identifier is the function name // in a call expression (e.g., `foo` in `foo(args)`) or a function name being // defined (e.g., `foo` in `def foo(args) do`). -func isFunctionNameInCall(node *tree_sitter.Node, src []byte) bool { +func isFunctionNameInCall(node *TreeNode, src []byte) bool { parent := node.Parent() if parent == nil { return false @@ -379,16 +297,16 @@ func definesNestedScope(node *tree_sitter.Node, src []byte) bool { // isAssignmentTarget returns true if node is on the left-hand side of a `=` // binary operator, meaning it is unambiguously a variable binding. -func isAssignmentTarget(node *resolvedNode, src []byte) bool { - parent := node.parent() - if parent == nil || parent.node.Kind() != "binary_operator" || parent.node.ChildCount() < 3 { +func isAssignmentTarget(node *TreeNode, src []byte) bool { + parent := node.Parent() + if parent == nil || parent.Kind() != "binary_operator" || parent.ChildCount() < 3 { return false } - if parent.node.Child(1).Utf8Text(src) != "=" { + if parent.Child(1).Utf8Text(src) != "=" { return false } - left := parent.node.Child(0) - return node.node.StartByte() >= left.StartByte() && node.node.EndByte() <= left.EndByte() + left := parent.Child(0) + return node.StartByte() >= left.StartByte() && node.EndByte() <= left.EndByte() } // variableDefinedInScope returns true if varName is bound (defined) in the @@ -396,7 +314,7 @@ func isAssignmentTarget(node *resolvedNode, src []byte) bool { // at a position other than the cursor. A bare identifier that only appears // at the cursor position is ambiguous (could be a zero-arity function call) // and should not be treated as a variable. -func variableDefinedInScope(scope *resolvedNode, src []byte, varName string, cursorLine, cursorCol uint) bool { +func variableDefinedInScope(scope *TreeNode, src []byte, varName string, cursorLine, cursorCol uint) bool { return identifierExistsElsewhere(scope, src, varName, cursorLine, cursorCol, true) } @@ -407,21 +325,21 @@ func variableDefinedInScope(scope *resolvedNode, src []byte, varName string, cur // the chosen scope itself, which may be such a def call) — otherwise a bare // top-level call sharing a name with a function-local would be misread as a // variable. -func identifierExistsElsewhere(node *resolvedNode, src []byte, name string, line, col uint, isRoot bool) bool { +func identifierExistsElsewhere(node *TreeNode, src []byte, name string, line, col uint, isRoot bool) bool { if node == nil { return false } if !isRoot && definesNestedScope(node, src) { return false } - if node.node.Kind() == "identifier" && node.utf8Text(src) == name && !isFunctionNameInCall(node, src) { - pos := node.startPosition() + if node.Kind() == "identifier" && node.Utf8Text(src) == name && !isFunctionNameInCall(node, src) { + pos := node.StartPosition() if uint(pos.Row) != line || uint(pos.Column) != col { return true } } - for i := uint(0); i < uint(node.node.ChildCount()); i++ { - if identifierExistsElsewhere(node.child(i), src, name, line, col, false) { + for i := uint(0); i < uint(node.ChildCount()); i++ { + if identifierExistsElsewhere(node.Child(i), src, name, line, col, false) { return true } } @@ -436,12 +354,12 @@ func identifierExistsElsewhere(node *resolvedNode, src []byte, name string, line // boundary ONLY when the cursor is inside the do_block — not when it's on the // right side of a <- clause, which is evaluated in the outer scope. // Otherwise, the enclosing def/defp/defmacro/test call is the scope. -func findEnclosingScope(node *resolvedNode, src []byte, varName string) *resolvedNode { +func findEnclosingScope(node *TreeNode, src []byte, varName string) *TreeNode { prev := node - current := node.parent() + current := node.Parent() for current != nil { - if current.node.Kind() == "stab_clause" { - if stabBindsVariable(current.node, src, varName) { + if current.Kind() == "stab_clause" { + if stabBindsVariable(current, src, varName) { return current } // Body rebinds the variable (e.g. `fn ^x -> x = nil end`): the @@ -449,12 +367,12 @@ func findEnclosingScope(node *resolvedNode, src []byte, varName string) *resolve // Note: if the cursor is on a closure reference BEFORE the rebind // in the same body, it will be scoped to the fn rather than the // outer function. This is an acceptable limitation for a rare pattern. - if stabBodyRebindsVariable(current.node, src, varName) { + if stabBodyRebindsVariable(current, src, varName) { return current } } - if current.node.Kind() == "call" && current.node.ChildCount() > 0 { - firstChild := current.node.Child(0) + if current.Kind() == "call" && current.ChildCount() > 0 { + firstChild := current.Child(0) if firstChild.Kind() == "identifier" && functionKeywords[firstChild.Utf8Text(src)] { return current } @@ -465,8 +383,8 @@ func findEnclosingScope(node *resolvedNode, src []byte, varName string) *resolve return current } // with/for/etc.: scope boundary unless cursor is on clause 0's rhs (outer scope). - if callHasDoBlock(current.node) && callArgumentPatternsBindVariable(current.node, src, varName) { - if cursorNeedsWithScope(current.node, prev.node, node.node, src, varName) { + if callHasDoBlock(current) && callArgumentPatternsBindVariable(current, src, varName) { + if cursorNeedsWithScope(current, prev, node, src, varName) { return current } } @@ -477,13 +395,13 @@ func findEnclosingScope(node *resolvedNode, src []byte, varName string) *resolve return current } prev = current - current = current.parent() + current = current.Parent() } return nil } // nodeIsInsideDoBlock returns true if child is inside the do_block of callNode. -func nodeIsInsideDoBlock(callNode, child *tree_sitter.Node) bool { +func nodeIsInsideDoBlock(callNode, child *TreeNode) bool { for i := uint(0); i < uint(callNode.ChildCount()); i++ { block := callNode.Child(i) if block.Kind() == "do_block" && @@ -499,7 +417,7 @@ func nodeIsInsideDoBlock(callNode, child *tree_sitter.Node) bool { // given with/for call should act as a scope boundary: inside the do_block, // on a lvalue of <-/=, or on the rhs of clause N>0 (which references clause // N-1's binding, not the outer scope). -func cursorNeedsWithScope(callNode, prev, cursor *tree_sitter.Node, src []byte, varName string) bool { +func cursorNeedsWithScope(callNode, prev, cursor *TreeNode, src []byte, varName string) bool { if nodeIsInsideDoBlock(callNode, prev) { return true } @@ -527,7 +445,7 @@ func cursorNeedsWithScope(callNode, prev, cursor *tree_sitter.Node, src []byte, // varName within the given subtree, skipping function names in calls. // skipScopeCheck should be true when node is the scope root itself (so we // don't immediately bail out of the scope we chose). -func collectVariableOccurrences(node *tree_sitter.Node, src []byte, varName string, out *[]VariableOccurrence, skipScopeCheck bool) { +func collectVariableOccurrences(node *TreeNode, src []byte, varName string, out *[]VariableOccurrence, skipScopeCheck bool) { if node == nil { return } @@ -587,7 +505,7 @@ func collectVariableOccurrences(node *tree_sitter.Node, src []byte, varName stri // stabBodyRebindsVariable returns true if the body of the stab_clause contains // an assignment (=) whose left-hand side unpinnedly binds varName. -func stabBodyRebindsVariable(stabClause *tree_sitter.Node, src []byte, varName string) bool { +func stabBodyRebindsVariable(stabClause *TreeNode, src []byte, varName string) bool { for i := uint(0); i < uint(stabClause.ChildCount()); i++ { child := stabClause.Child(i) if child.Kind() == "arguments" { @@ -602,7 +520,7 @@ func stabBodyRebindsVariable(stabClause *tree_sitter.Node, src []byte, varName s // subtreeContainsAssignmentOf returns true if the subtree has a binary "=" // whose lvalue unpinnedly binds varName. -func subtreeContainsAssignmentOf(node *tree_sitter.Node, src []byte, varName string) bool { +func subtreeContainsAssignmentOf(node *TreeNode, src []byte, varName string) bool { if node == nil { return false } @@ -623,7 +541,7 @@ func subtreeContainsAssignmentOf(node *tree_sitter.Node, src []byte, varName str // collectStabArgs collects variable occurrences from the args of a stab_clause // only (not the body). Used when the body rebinds the variable. -func collectStabArgs(stabClause *tree_sitter.Node, src []byte, varName string, out *[]VariableOccurrence) { +func collectStabArgs(stabClause *TreeNode, src []byte, varName string, out *[]VariableOccurrence) { for i := uint(0); i < uint(stabClause.ChildCount()); i++ { child := stabClause.Child(i) if child.Kind() == "arguments" { @@ -637,7 +555,7 @@ func collectStabArgs(stabClause *tree_sitter.Node, src []byte, varName string, o // // @foo → unary_operator("@") → identifier("foo") // @foo value → unary_operator("@") → call → identifier("foo") … -func isModuleAttributeIdent(node *tree_sitter.Node, src []byte) bool { +func isModuleAttributeIdent(node *TreeNode, src []byte) bool { parent := node.Parent() if parent == nil { return false @@ -657,7 +575,7 @@ func isModuleAttributeIdent(node *tree_sitter.Node, src []byte) bool { } // isAtUnaryOp returns true if node is a unary_operator with the @ operator. -func isAtUnaryOp(node *tree_sitter.Node, src []byte) bool { +func isAtUnaryOp(node *TreeNode, src []byte) bool { if node.Kind() != "unary_operator" { return false } @@ -671,7 +589,7 @@ func isAtUnaryOp(node *tree_sitter.Node, src []byte) bool { } // findEnclosingModule walks up from node to find the nearest defmodule call. -func findEnclosingModule(node *tree_sitter.Node, src []byte) *tree_sitter.Node { +func findEnclosingModule(node *TreeNode, src []byte) *TreeNode { current := node.Parent() for current != nil { if current.Kind() == "call" && current.ChildCount() > 0 { @@ -688,7 +606,7 @@ func findEnclosingModule(node *tree_sitter.Node, src []byte) *tree_sitter.Node { // collectModuleAttributeOccurrences collects all @attrName occurrences within // the subtree — that is, identifier nodes named attrName that are part of a // module attribute expression (@attrName or @attrName value). -func collectModuleAttributeOccurrences(node *tree_sitter.Node, src []byte, attrName string, out *[]VariableOccurrence) { +func collectModuleAttributeOccurrences(node *TreeNode, src []byte, attrName string, out *[]VariableOccurrence) { if node == nil { return } @@ -721,11 +639,11 @@ func FindTokenOccurrences(src []byte, token string) []VariableOccurrence { // pre-parsed tree root. func (t *Tree) FindTokenOccurrences(src []byte, token string) []VariableOccurrence { var occurrences []VariableOccurrence - collectTokenOccurrences(t.Trunk.RootNode(), src, token, &occurrences) + collectTokenOccurrences(t.TrunkNode(), src, token, &occurrences) return occurrences } -func collectTokenOccurrences(node *tree_sitter.Node, src []byte, token string, out *[]VariableOccurrence) { +func collectTokenOccurrences(node *TreeNode, src []byte, token string, out *[]VariableOccurrence) { if node == nil { return } @@ -793,15 +711,15 @@ func FindVariablesInScope(src []byte, line, col uint) []string { // FindVariablesInScopeWithTree is like FindVariablesInScope but uses a // pre-parsed tree root. func (t *Tree) FindVariablesInScope(src []byte, line, col uint) []string { - cursorNode := t.nodeAtPosition(t.Trunk.RootNode(), line, col) + cursorNode := t.TrunkNode().ChildAtPosition(line, col) if cursorNode == nil && col > 0 { - cursorNode = t.nodeAtPosition(t.Trunk.RootNode(), line, col-1) + cursorNode = t.TrunkNode().ChildAtPosition(line, col-1) } if cursorNode == nil { return nil } - scope := findEnclosingFunction(cursorNode.trunkNode(), src) + scope := findEnclosingFunction(cursorNode, src) if scope == nil { return nil } @@ -813,7 +731,7 @@ func (t *Tree) FindVariablesInScope(src []byte, line, col uint) []string { } // findEnclosingFunction walks up from node to find the nearest def/defp/etc scope. -func findEnclosingFunction(node *tree_sitter.Node, src []byte) *tree_sitter.Node { +func findEnclosingFunction(node *TreeNode, src []byte) *TreeNode { current := node.Parent() for current != nil { if current.Kind() == "call" && current.ChildCount() > 0 { @@ -831,12 +749,12 @@ func findEnclosingFunction(node *tree_sitter.Node, src []byte) *tree_sitter.Node // excluding function names, definition keywords, and module attributes. // Skips stab_clauses and do..end calls that don't contain the cursor, // since variables don't leak out of those scopes in Elixir. -func collectVariableNames(node *tree_sitter.Node, src []byte, seen map[string]bool, out *[]string, cursorLine, cursorCol uint) { +func collectVariableNames(node *TreeNode, src []byte, seen map[string]bool, out *[]string, cursorLine, cursorCol uint) { if node == nil { return } - if !nodeContainsPosition(node, cursorLine, cursorCol) { + if !node.ContainsPosition(cursorLine, cursorCol) { // Variables in other case/fn clauses are not in scope. if node.Kind() == "stab_clause" { return @@ -867,8 +785,8 @@ func collectVariableNames(node *tree_sitter.Node, src []byte, seen map[string]bo // extractArrowClauses returns the binary_operator nodes for <- and = in the // call's arguments, in source order. -func extractArrowClauses(callNode *tree_sitter.Node, src []byte) []*tree_sitter.Node { - var clauses []*tree_sitter.Node +func extractArrowClauses(callNode *TreeNode, src []byte) []*TreeNode { + var clauses []*TreeNode for i := uint(0); i < uint(callNode.ChildCount()); i++ { child := callNode.Child(i) if child.Kind() != "arguments" { @@ -897,7 +815,7 @@ func extractArrowClauses(callNode *tree_sitter.Node, src []byte) []*tree_sitter. // - Cursor on rhs1: uses lhs0's binding — collect lhs0 + rhs1 (+ further rhs until rebind) + body // - Cursor on lhs1: collect lhs1 + body // - Cursor in body: uses last clause's binding — collect last lhs + body -func collectWithOccurrences(callNode, cursor *tree_sitter.Node, src []byte, varName string, out *[]VariableOccurrence) { +func collectWithOccurrences(callNode, cursor *TreeNode, src []byte, varName string, out *[]VariableOccurrence) { clauses := extractArrowClauses(callNode, src) // Find which clause and side the cursor is on @@ -920,7 +838,7 @@ func collectWithOccurrences(callNode, cursor *tree_sitter.Node, src []byte, varN } // Find the do_block - var doBlock *tree_sitter.Node + var doBlock *TreeNode for i := uint(0); i < uint(callNode.ChildCount()); i++ { child := callNode.Child(i) if child.Kind() == "do_block" { @@ -993,7 +911,7 @@ func collectWithOccurrences(callNode, cursor *tree_sitter.Node, src []byte, varN // of =/← binary operators in a call's arguments, processing clauses // sequentially. Once a clause's pattern (left side) rebinds varName, // subsequent clauses and the do_block use the new binding — so we stop. -func collectPatternExpressionOccurrences(callNode *tree_sitter.Node, src []byte, varName string, out *[]VariableOccurrence) { +func collectPatternExpressionOccurrences(callNode *TreeNode, src []byte, varName string, out *[]VariableOccurrence) { for i := uint(0); i < uint(callNode.ChildCount()); i++ { child := callNode.Child(i) if child.Kind() != "arguments" { @@ -1023,7 +941,7 @@ func collectPatternExpressionOccurrences(callNode *tree_sitter.Node, src []byte, // callArgumentPatternsBindVariable checks whether a call's argument patterns // (left side of = or <- operators) contain an unpinned binding of varName. -func callArgumentPatternsBindVariable(node *tree_sitter.Node, src []byte, varName string) bool { +func callArgumentPatternsBindVariable(node *TreeNode, src []byte, varName string) bool { for i := uint(0); i < uint(node.ChildCount()); i++ { child := node.Child(i) if child.Kind() != "arguments" { @@ -1044,7 +962,7 @@ func callArgumentPatternsBindVariable(node *tree_sitter.Node, src []byte, varNam return false } -func callHasDoBlock(node *tree_sitter.Node) bool { +func callHasDoBlock(node *TreeNode) bool { for i := uint(0); i < uint(node.ChildCount()); i++ { if node.Child(i).Kind() == "do_block" { return true @@ -1053,28 +971,11 @@ func callHasDoBlock(node *tree_sitter.Node) bool { return false } -// nodeContainsPosition returns true if the node's range includes the given position. -// Tree-sitter end positions are exclusive, consistent with nodeAtPosition. -func nodeContainsPosition(node *tree_sitter.Node, line, col uint) bool { - start := node.StartPosition() - end := node.EndPosition() - if line < uint(start.Row) || line > uint(end.Row) { - return false - } - if line == uint(start.Row) && col < uint(start.Column) { - return false - } - if line == uint(end.Row) && col >= uint(end.Column) { - return false - } - return true -} - // stabBindsVariable returns true if the stab_clause's arguments (pattern) // contain an unpinned identifier matching varName, meaning it creates a new // binding. Pinned variables (^varName) reference the outer scope and do NOT // create a new binding. -func stabBindsVariable(stabClause *tree_sitter.Node, src []byte, varName string) bool { +func stabBindsVariable(stabClause *TreeNode, src []byte, varName string) bool { for i := uint(0); i < uint(stabClause.ChildCount()); i++ { child := stabClause.Child(i) if child.Kind() == "arguments" { @@ -1087,7 +988,7 @@ func stabBindsVariable(stabClause *tree_sitter.Node, src []byte, varName string) // subtreeContainsUnpinnedIdentifier returns true if any identifier node in the // subtree has the given name AND is not pinned (^varName). Pinned variables // reference an outer binding and do not create a new one. -func subtreeContainsUnpinnedIdentifier(node *tree_sitter.Node, src []byte, name string) bool { +func subtreeContainsUnpinnedIdentifier(node *TreeNode, src []byte, name string) bool { if node == nil { return false } @@ -1107,7 +1008,7 @@ func subtreeContainsUnpinnedIdentifier(node *tree_sitter.Node, src []byte, name } // isPinOperator returns true if node is a unary_operator with the ^ operator. -func isPinOperator(node *tree_sitter.Node, src []byte) bool { +func isPinOperator(node *TreeNode, src []byte) bool { if node.Kind() != "unary_operator" { return false } From 7eb56c39dda10e801e63318ea0eaba9da9eaeb90 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 00:27:11 -0700 Subject: [PATCH 24/31] parse HEEX function components as references --- internal/lsp/server_test.go | 37 ++++++++++------------------- internal/parser/parser_tokenized.go | 9 +++++++ internal/parser/tokenizer.go | 6 ++++- internal/parser/tokenizer_test.go | 17 +++++++++---- internal/parser/tokenkind_string.go | 16 +++++++------ 5 files changed, 47 insertions(+), 38 deletions(-) diff --git a/internal/lsp/server_test.go b/internal/lsp/server_test.go index 54d74a7..65df317 100644 --- a/internal/lsp/server_test.go +++ b/internal/lsp/server_test.go @@ -3,7 +3,6 @@ package lsp import ( "context" "fmt" - "log" "os" "os/exec" "path/filepath" @@ -17,7 +16,6 @@ import ( "github.com/remoteoss/dexter/internal/parser" "github.com/remoteoss/dexter/internal/stdlib" "github.com/remoteoss/dexter/internal/store" - "github.com/remoteoss/dexter/internal/treesitter" ) func setupTestServer(t *testing.T) (*Server, func()) { @@ -2923,21 +2921,6 @@ end } } -func TestPlayground(t *testing.T) { - src := `~H""" -{@foo} -"""` - tree := treesitter.NewTree([]byte(src)) - defer tree.Close() - - for _, t := range tree.Branches { - log.Printf("t: %s", t.Trunk.RootNode().ToSexp()) - for _, tt := range t.Branches { - log.Printf("tt: %s", tt.Trunk.RootNode().ToSexp()) - } - } -} - func TestReferences_HEEXNestedReference(t *testing.T) { server, cleanup := setupTestServer(t) defer cleanup() @@ -2962,18 +2945,22 @@ end uri := "file://" + filepath.Join(server.projectRoot, "lib", "app.ex") server.docs.Set(uri, src) - // Go-to-references on "foo" in the <.foo /> component (line 8, col 6) - locs := referencesAt(t, server, uri, 7, 6) - log.Printf("%+v", locs) + // Go-to-references on "foo" in the component (line 9, col 11) + locs := referencesAt(t, server, uri, 8, 11) if len(locs) == 0 { - t.Fatal("expected references for function foo") + t.Fatal("expected references for function MyApp.foo") + } + if locs[0].Range.Start.Line != 8 { + t.Fatalf("expected reference on line 8, got line %d", locs[0].Range.Start.Line) } - // Go-to-references on "foo" in the def line (line 4, col 6) - locs = referencesAt(t, server, uri, 3, 6) - log.Printf("%+v", locs) + // Go-to-references on "foo" in the <.foo /> line (line 8, col 6) + locs = referencesAt(t, server, uri, 7, 6) if len(locs) == 0 { - t.Fatal("expected references for function foo") + t.Fatal("expected references for function .foo") + } + if locs[0].Range.Start.Line != 7 { + t.Fatalf("expected reference on line 7, got line %d", locs[0].Range.Start.Line) } } diff --git a/internal/parser/parser_tokenized.go b/internal/parser/parser_tokenized.go index 04d6153..6ad7506 100644 --- a/internal/parser/parser_tokenized.go +++ b/internal/parser/parser_tokenized.go @@ -649,6 +649,15 @@ func parseTextFromTokens(path string, source []byte, tokens []Token) ([]Definiti case TokIdent: cm := currentModule() if cm != "" && len(injectors) > 0 { + isHEEXFunction := i > 1 && tokens[i-1].Kind == TokDot && + (tokens[i-2].Kind == TokHEEXOpenTag || tokens[i-2].Kind == TokHEEXCloseTag) + if isHEEXFunction { + name := tokenText(tok) + refs = append(refs, Reference{Module: cm, Function: name, Line: tok.Line, FilePath: path, Kind: "call"}) + i++ + continue + } + isStatementStart := i == 0 || tokens[i-1].Kind == TokEOL || tokens[i-1].Kind == TokComment if isStatementStart { name := tokenText(tok) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 8ec7522..e438820 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -65,6 +65,8 @@ const ( TokAssoc // => TokDoubleColon // :: TokPercent // % + TokHEEXOpenTag // < + TokHEEXCloseTag // ' { i++ @@ -1064,6 +1067,7 @@ func TokenizeHeex(source []byte) TokenResult { } else { // HTML tag "" TokEOF (24:24) `}, - {"
hello!
", `TokEOF (17:17) + {"
hello!
", `TokHEEXOpenTag (0:1) +TokHEEXCloseTag (11:13) +TokEOF (17:17) `}, - {"<.foo>", `TokDot (1:2) + {"<.foo>", `TokHEEXOpenTag (0:1) +TokDot (1:2) TokIdent (2:5) "foo" +TokHEEXCloseTag (6:8) TokDot (8:9) TokIdent (9:12) "foo" TokEOF (13:13) `}, - {"<.foo />", `TokDot (1:2) + {"<.foo />", `TokHEEXOpenTag (0:1) +TokDot (1:2) TokIdent (2:5) "foo" TokEOF (8:8) `}, - {"<.live_component id=\"foo\" module={Foo.Bar} no-value />", `TokDot (1:2) + {"<.live_component id=\"foo\" module={Foo.Bar} no-value />", `TokHEEXOpenTag (0:1) +TokDot (1:2) TokIdent (2:16) "live_component" TokModule (34:37) "Foo" TokDot (37:38) TokModule (38:41) "Bar" TokEOF (54:54) `}, - {"
", `TokString (12:16) "\"{}\"" + {"
", `TokHEEXOpenTag (0:1) +TokString (12:16) "\"{}\"" TokEOF (20:20) `}, } diff --git a/internal/parser/tokenkind_string.go b/internal/parser/tokenkind_string.go index 8de4041..f1f0ba0 100644 --- a/internal/parser/tokenkind_string.go +++ b/internal/parser/tokenkind_string.go @@ -59,16 +59,18 @@ func _() { _ = x[TokAssoc-48] _ = x[TokDoubleColon-49] _ = x[TokPercent-50] - _ = x[TokNumber-51] - _ = x[TokComment-52] - _ = x[TokEOL-53] - _ = x[TokEOF-54] - _ = x[TokOther-55] + _ = x[TokHEEXOpenTag-51] + _ = x[TokHEEXCloseTag-52] + _ = x[TokNumber-53] + _ = x[TokComment-54] + _ = x[TokEOL-55] + _ = x[TokEOF-56] + _ = x[TokOther-57] } -const _TokenKind_name = "TokDefmoduleTokDefTokDefpTokDefmacroTokDefmacropTokDefguardTokDefguardpTokDefdelegateTokDefprotocolTokDefimplTokDefstructTokDefexceptionTokAliasTokImportTokUseTokRequireTokDoTokEndTokFnTokWhenTokIdentTokModuleTokAttrTokAttrDocTokAttrSpecTokAttrTypeTokAttrBehaviourTokAttrCallbackTokStringTokHeredocTokSigilTokCharLiteralTokAtomTokDotTokCommaTokColonTokOpenParenTokCloseParenTokOpenBracketTokCloseBracketTokOpenBraceTokCloseBraceTokOpenAngleTokCloseAngleTokPipeTokBackslashTokRightArrowTokLeftArrowTokAssocTokDoubleColonTokPercentTokNumberTokCommentTokEOLTokEOFTokOther" +const _TokenKind_name = "TokDefmoduleTokDefTokDefpTokDefmacroTokDefmacropTokDefguardTokDefguardpTokDefdelegateTokDefprotocolTokDefimplTokDefstructTokDefexceptionTokAliasTokImportTokUseTokRequireTokDoTokEndTokFnTokWhenTokIdentTokModuleTokAttrTokAttrDocTokAttrSpecTokAttrTypeTokAttrBehaviourTokAttrCallbackTokStringTokHeredocTokSigilTokCharLiteralTokAtomTokDotTokCommaTokColonTokOpenParenTokCloseParenTokOpenBracketTokCloseBracketTokOpenBraceTokCloseBraceTokOpenAngleTokCloseAngleTokPipeTokBackslashTokRightArrowTokLeftArrowTokAssocTokDoubleColonTokPercentTokHEEXOpenTagTokHEEXCloseTagTokNumberTokCommentTokEOLTokEOFTokOther" -var _TokenKind_index = [...]uint16{0, 12, 18, 25, 36, 48, 59, 71, 85, 99, 109, 121, 136, 144, 153, 159, 169, 174, 180, 185, 192, 200, 209, 216, 226, 237, 248, 264, 279, 288, 298, 306, 320, 327, 333, 341, 349, 361, 374, 388, 403, 415, 428, 440, 453, 460, 472, 485, 497, 505, 519, 529, 538, 548, 554, 560, 568} +var _TokenKind_index = [...]uint16{0, 12, 18, 25, 36, 48, 59, 71, 85, 99, 109, 121, 136, 144, 153, 159, 169, 174, 180, 185, 192, 200, 209, 216, 226, 237, 248, 264, 279, 288, 298, 306, 320, 327, 333, 341, 349, 361, 374, 388, 403, 415, 428, 440, 453, 460, 472, 485, 497, 505, 519, 529, 543, 558, 567, 577, 583, 589, 597} func (i TokenKind) String() string { idx := int(i) - 0 From 3fde08555bf2758752edcbfc9d3418c0d20ddb6e Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 00:45:38 -0700 Subject: [PATCH 25/31] check variable occurences within sigils --- internal/parser/tokenizer.go | 6 ++---- internal/treesitter/tree.go | 7 +++---- internal/treesitter/variables.go | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index e438820..86f4993 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -820,11 +820,11 @@ func scanSigil(source []byte, i, line int, lineStarts *[]int, tokens *[]Token) ( return i, line } -func scanSigilContents(sigilChars string, source []byte, start, end, contentsStart, contentsEnd, line int, lineStarts *[]int, tokens *[]Token) (int, int) { +func scanSigilContents(sigilChars string, source []byte, start, end, contentsStart, contentsEnd, line int, lineStarts *[]int, tokens *[]Token) { // only scan the contents of HEEX `~H` sigils if sigilChars != "H" { *tokens = append(*tokens, Token{Kind: TokSigil, Start: start, End: end, Line: line}) - return start, line + return } // lineStarts has already been updated by `scanHeredocContent` / `scanRawHeredocContent` @@ -834,8 +834,6 @@ func scanSigilContents(sigilChars string, source []byte, start, end, contentsSta *tokens = append(*tokens, Token{Kind: t.Kind, Start: t.Start + contentsStart, End: t.End + contentsStart, Line: t.Line + line - 1}) } } - - return start, line } func TokenizeHeex(source []byte) TokenResult { diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index 7196af6..f302f51 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -15,9 +15,8 @@ import ( // in the document tree to their corresponding Elixir sub-tree. Sub-trees may // be nested arbitrarily deep, though in practice it will typically be 1-3 levels. // -// For nested sub-trees, Root and RootNode point back to the parent tree branch -// and node that contains the sub-tree. Navigation is possible both up (using Parent()) -// and down (using ChildCount() and Child(i)). +// For nested sub-trees, Root points back to the parent tree that contains the +// sub-tree. Navigation is possible both up (using Parent()) and down (using Child(i)). // // Elixir->HEEX: (sigil (sigil_name) node: (quoted_content)) // HEEX->Elixir: (expression node: (expression_value)) @@ -33,7 +32,7 @@ func (t *Tree) TrunkNode() *TreeNode { return &TreeNode{Tree: t, Node: t.Trunk.RootNode()} } -// Close closes the trunk tree and any HEEX sub-trees. +// Close recursively closes the trunk tree and any branch sub-trees. func (t *Tree) Close() { for _, b := range t.Branches { b.Close() diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 8e0e2f5..76a8e59 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -651,7 +651,7 @@ func collectTokenOccurrences(node *TreeNode, src []byte, token string, out *[]Va kind := node.Kind() // Skip subtrees that can't contain meaningful identifier references - if kind == "string" || kind == "comment" || kind == "sigil" || kind == "charlist" { + if kind == "string" || kind == "comment" || kind == "charlist" { return } From f8decb4d9a91f11b64e94b207a095b4459d3fc1f Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 08:22:44 -0700 Subject: [PATCH 26/31] fix StartPosition/EndPosition on first line --- internal/treesitter/tree.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index f302f51..72adf50 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -110,7 +110,7 @@ func (tn *TreeNode) StartPosition() tree_sitter.Point { p := tn.Tree.Root.StartPosition() sp := tn.Node.StartPosition() p.Row += sp.Row - if sp.Row == p.Row { + if sp.Row == 0 { p.Column += sp.Column } else { p.Column = sp.Column @@ -127,7 +127,7 @@ func (tn *TreeNode) EndPosition() tree_sitter.Point { p := tn.Tree.Root.StartPosition() ep := tn.Node.EndPosition() p.Row += ep.Row - if ep.Row == p.Row { + if ep.Row == 0 { p.Column += ep.Column } else { p.Column = ep.Column From d446dbfea93d9d244fd6fac7401cafb17991910c Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 11:43:32 -0700 Subject: [PATCH 27/31] fuzz TokenizeHeex, add tests for Tree/TreeNode --- .../fuzz/FuzzTokenizeHeex/4e129080bf679ec0 | 2 + .../fuzz/FuzzTokenizeHeex/d312599b9dbee58c | 2 + internal/parser/tokenizer.go | 14 ++-- internal/parser/tokenizer_test.go | 15 ++++ internal/treesitter/tree.go | 5 ++ internal/treesitter/tree_test.go | 75 +++++++++++++++++++ 6 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 internal/parser/testdata/fuzz/FuzzTokenizeHeex/4e129080bf679ec0 create mode 100644 internal/parser/testdata/fuzz/FuzzTokenizeHeex/d312599b9dbee58c create mode 100644 internal/treesitter/tree_test.go diff --git a/internal/parser/testdata/fuzz/FuzzTokenizeHeex/4e129080bf679ec0 b/internal/parser/testdata/fuzz/FuzzTokenizeHeex/4e129080bf679ec0 new file mode 100644 index 0000000..2dee8a8 --- /dev/null +++ b/internal/parser/testdata/fuzz/FuzzTokenizeHeex/4e129080bf679ec0 @@ -0,0 +1,2 @@ +go test fuzz v1 +string("<%") diff --git a/internal/parser/testdata/fuzz/FuzzTokenizeHeex/d312599b9dbee58c b/internal/parser/testdata/fuzz/FuzzTokenizeHeex/d312599b9dbee58c new file mode 100644 index 0000000..fecbcf9 --- /dev/null +++ b/internal/parser/testdata/fuzz/FuzzTokenizeHeex/d312599b9dbee58c @@ -0,0 +1,2 @@ +go test fuzz v1 +string("<0/") diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 86f4993..0d27cdc 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -950,11 +950,14 @@ func TokenizeHeex(source []byte) TokenResult { i++ } + case quoteChar != 0: + i++ + case ch == '{': i++ return scanInterpolation(i, line, "}", tokens) - case ch == '>': + case ch == '>' || ch == '/': return i, line default: @@ -994,10 +997,11 @@ func TokenizeHeex(source []byte) TokenResult { // self-closing tag case source[i] == '/': - if i+1 < len(source) && source[i+1] == '>' { - i += 2 - return i, line + i++ + if i < len(source) && source[i] == '>' { + i++ } + return i, line // finish open tag case source[i] == '>': @@ -1046,7 +1050,7 @@ func TokenizeHeex(source []byte) TokenResult { startLine := line i, line = scanComment("--%>", i, line, &lineStarts) tokens = append(tokens, Token{Kind: TokComment, Start: start, End: i, Line: startLine}) - } else { + } else if i < len(source) { // consume "=" output indicator from "<%=" special form prefix if source[i] == '=' { i++ diff --git a/internal/parser/tokenizer_test.go b/internal/parser/tokenizer_test.go index d4536bf..24515b1 100644 --- a/internal/parser/tokenizer_test.go +++ b/internal/parser/tokenizer_test.go @@ -2215,6 +2215,21 @@ TokEOF (20:20) } } +func FuzzTokenizeHeex(f *testing.F) { + f.Fuzz(func(t *testing.T, src string) { + err := withTimeout(2_000, func() { + result := TokenizeHeex([]byte(src)) + // should always output at least TokEOF + if len(result.Tokens) == 0 { + t.Errorf("TokenizeHeex(src) empty output\n\n%.512s", src) + } + }) + if err == context.DeadlineExceeded { + t.Errorf("TokenizeHeex(src) timeout after 2s\n\n%.512s", src) + } + }) +} + func withTimeout(ms time.Duration, cb func()) error { ctx, cancel := context.WithTimeout(context.Background(), ms*time.Millisecond) defer cancel() diff --git a/internal/treesitter/tree.go b/internal/treesitter/tree.go index 72adf50..a6619f5 100644 --- a/internal/treesitter/tree.go +++ b/internal/treesitter/tree.go @@ -57,6 +57,11 @@ func (tn *TreeNode) IsNamed() bool { return tn.Node.IsNamed() } +// See tree_sitter.Node.ToSexp(). +func (tn *TreeNode) ToSexp() string { + return tn.Node.ToSexp() +} + // See tree_sitter.Node.StartByte(). func (tn *TreeNode) StartByte() uint { if tn.Tree.Root == nil { diff --git a/internal/treesitter/tree_test.go b/internal/treesitter/tree_test.go new file mode 100644 index 0000000..b6dbec2 --- /dev/null +++ b/internal/treesitter/tree_test.go @@ -0,0 +1,75 @@ +package treesitter + +import ( + "maps" + "slices" + "testing" +) + +func TestNewTree(t *testing.T) { + src := `def render(assigns) do + ~H""" +
+ <%= bar() %> +
+ """ +end` + tree := NewTree([]byte(src)) + if tree.Language != LangElixir { + t.Errorf("expected Elixir root tree, got %#v", tree.Language) + } + + heexNodeIds := slices.Collect(maps.Keys(tree.Branches)) + if len(heexNodeIds) != 1 { + t.Errorf("expected 1 Heex branch, got %d", len(heexNodeIds)) + } + heexTree := tree.Branches[heexNodeIds[0]] + if heexTree.Language != LangHeex { + t.Errorf("expected Heex branch sub-tree, got %#v", heexTree.Language) + } + if rootId := heexTree.Root.Node.Id(); rootId != heexNodeIds[0] { + t.Errorf("expected Heex root to match branch node ID %d, got %d", heexNodeIds[0], rootId) + } + wantHeex := "
\n <%= bar() %>\n
\n " + if heexText := heexTree.TrunkNode().Utf8Text([]byte(src)); heexText != wantHeex { + t.Errorf("unexpected Heex text (-want, +got)\n- %#v\n+ %#v", wantHeex, heexText) + } + + exNodeIds := slices.Collect(maps.Keys(heexTree.Branches)) + if len(exNodeIds) != 2 { + t.Errorf("expected 2 Elixir branch, got %d", len(exNodeIds)) + } + for _, branch := range heexTree.Branches { + if exText := branch.TrunkNode().Utf8Text([]byte(src)); !slices.Contains([]string{"foo()", "bar()"}, exText) { + t.Errorf("unexpected nested Elixir text, got %#v", exText) + } + } +} + +func TestTreeNode_ByteAndPosition(t *testing.T) { + src := `def render(assigns) do + ~H""" +
+ <%= bar() %> +
+ """ +end` + + tree := NewTree([]byte(src)) + // bar() on line 4 col 8 + node := tree.TrunkNode().ChildAtPosition(3, 8) + text := node.Utf8Text([]byte(src)) + if node.StartByte() != 61 { + t.Errorf("expected %#v to start at byte %d, got %d", text, 61, node.StartByte()) + } + if node.EndByte() != 64 { + t.Errorf("expected %#v to end at byte %d, got %d", text, 64, node.EndByte()) + } + + if sp := node.StartPosition(); sp.Row != 3 || sp.Column != 8 { + t.Errorf("expected %#v to start at position (Row: %d, Col: %d), got (Row: %d, Col: %d)", text, 0, 0, sp.Row, sp.Column) + } + if ep := node.EndPosition(); ep.Row != 3 || ep.Column != 11 { + t.Errorf("expected %#v to end at position (Row: %d, Col: %d), got (Row: %d, Col: %d)", text, 0, 0, ep.Row, ep.Column) + } +} From 31467ad178ebf33332222a6bf9ef3c1b9e8b50b4 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 11:57:49 -0700 Subject: [PATCH 28/31] fix tag attr quote handling --- internal/parser/tokenizer.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/parser/tokenizer.go b/internal/parser/tokenizer.go index 0d27cdc..ac24d0f 100644 --- a/internal/parser/tokenizer.go +++ b/internal/parser/tokenizer.go @@ -950,6 +950,10 @@ func TokenizeHeex(source []byte) TokenResult { i++ } + case quoteChar != 0 && ch == quoteChar: + i++ + return i, line + case quoteChar != 0: i++ @@ -957,7 +961,7 @@ func TokenizeHeex(source []byte) TokenResult { i++ return scanInterpolation(i, line, "}", tokens) - case ch == '>' || ch == '/': + case ch == '>': return i, line default: @@ -1000,8 +1004,8 @@ func TokenizeHeex(source []byte) TokenResult { i++ if i < len(source) && source[i] == '>' { i++ + return i, line } - return i, line // finish open tag case source[i] == '>': From 4216225584f182314fab0d771bd5465a22fecf53 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 12:08:02 -0700 Subject: [PATCH 29/31] bump index version Parsing HEEX sigils will introduce new references. --- internal/version/version.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/version/version.go b/internal/version/version.go index 2e2c620..416e634 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -5,4 +5,4 @@ const Version = "0.7.0" // IndexVersion is incremented whenever the index schema or parser changes in a // way that requires a full rebuild. Bump this alongside Version when releasing // a change that makes existing indexes stale. -const IndexVersion = 12 +const IndexVersion = 13 From 429f6bd7a85a437f70a98afb852ec62148892a48 Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 12:14:28 -0700 Subject: [PATCH 30/31] *tree_sitter.Node -> *TreeNode after rebase --- internal/treesitter/variables.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index 76a8e59..ba16da6 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -2,8 +2,6 @@ package treesitter import ( "strings" - - tree_sitter "github.com/tree-sitter/go-tree-sitter" ) // VariableOccurrence is a position where a variable name appears. @@ -269,7 +267,7 @@ var moduleKeywords = map[string]bool{ // function definition do not leak to (and cannot reference) an enclosing // module/script scope, so traversals rooted at an outer scope must not descend // into these. -func isFunctionDefinitionCall(node *tree_sitter.Node, src []byte) bool { +func isFunctionDefinitionCall(node *TreeNode, src []byte) bool { if node.Kind() != "call" || node.ChildCount() == 0 { return false } @@ -279,7 +277,7 @@ func isFunctionDefinitionCall(node *tree_sitter.Node, src []byte) bool { // isModuleDefinitionCall reports whether node is a defmodule/defprotocol/defimpl // call, which opens a module-body scope. -func isModuleDefinitionCall(node *tree_sitter.Node, src []byte) bool { +func isModuleDefinitionCall(node *TreeNode, src []byte) bool { if node.Kind() != "call" || node.ChildCount() == 0 { return false } @@ -291,7 +289,7 @@ func isModuleDefinitionCall(node *tree_sitter.Node, src []byte) bool { // variable scope — a function or module definition. A traversal rooted at an // outer scope (a module body, or the whole file) must not descend into these, // or a rename/collision check would wrongly reach into an unrelated scope. -func definesNestedScope(node *tree_sitter.Node, src []byte) bool { +func definesNestedScope(node *TreeNode, src []byte) bool { return isFunctionDefinitionCall(node, src) || isModuleDefinitionCall(node, src) } From 713f6f37a8fe1ec1c1277da794e25de6b8b129be Mon Sep 17 00:00:00 2001 From: Aaron Ross Date: Thu, 11 Jun 2026 12:28:42 -0700 Subject: [PATCH 31/31] findEnclosingScope traverse up through multiple trees --- internal/treesitter/variables.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/treesitter/variables.go b/internal/treesitter/variables.go index ba16da6..4d563a0 100644 --- a/internal/treesitter/variables.go +++ b/internal/treesitter/variables.go @@ -389,7 +389,7 @@ func findEnclosingScope(node *TreeNode, src []byte, varName string) *TreeNode { } // Reached the file root without an inner scope: top-level script // bindings (e.g. config/runtime.exs) are scoped to the whole file. - if current.Kind() == "source" { + if current.Kind() == "source" && current.Parent() == nil { return current } prev = current