From d2f6e6dd6ef2e12f31149914bb390982547e2b82 Mon Sep 17 00:00:00 2001 From: Matheus Martins Date: Sat, 13 Jun 2026 20:25:33 +0000 Subject: [PATCH 1/2] fix(semantic-tokens): classify turbofish type args by resolved kind Concrete type arguments at a turbofish call site were tagged `typeParameter`, which per LSP semantics denotes a *formal* type variable, not a concrete type supplied at a use site. The token scan now tracks each generic clause's kind (declaration vs call-site) and classifies in-clause identifiers accordingly: - declaration clause (`class Box`, `fn`): formal params stay `typeParameter`; - call-site turbofish (`Box::`, `Util::identity::`): each concrete arg paints by resolved kind -- builtin/scalar -> `type`, a forwarded in-scope type variable -> `typeParameter`, otherwise a named user type -> `class`. Forwarded-T detection reuses the AST walk, which now also exports a file-wide set of declared type-param names for the token pass to consult (threaded like the existing reclassify-variable map). Also emit `operator` tokens for the turbofish punctuation `::`, `<`, `>` so the delimiters are no longer rendered uncolored, scoped to generic clauses only (a bare `Foo::BAR` never colors its `::`). Co-Authored-By: Claude Opus 4.8 (1M context) --- features/understand/semantic_tokens.feature | 22 ++- src/Handler/SemanticTokens/AstVisitor.php | 125 +++++++++++++---- .../Handler/SemanticTokens/AstVisitorTest.php | 131 +++++++++++++++--- 3 files changed, 224 insertions(+), 54 deletions(-) diff --git a/features/understand/semantic_tokens.feature b/features/understand/semantic_tokens.feature index 6533432..9a6d838 100644 --- a/features/understand/semantic_tokens.feature +++ b/features/understand/semantic_tokens.feature @@ -27,7 +27,7 @@ Feature: Semantic tokens When I request "textDocument/semanticTokens/full" for "/closure.xphp" Then a "typeParameter" token covers "T" in "/closure.xphp" - Scenario: Highlight every type argument of a turbofish call, lowercase scalar included + Scenario: Highlight every type argument of a turbofish call by its resolved kind Given the file at "/turbofish.xphp" contains the following lines: """ " in "/turbofish.xphp" + + Scenario: Forward a type parameter through a turbofish inside a generic body + Given the file at "/forward.xphp" contains the following lines: + """ + { + public function make(): mixed { return Inner::create::(); } + } + """ + And the FQN index has been warmed on initialize + When I request "textDocument/semanticTokens/full" for "/forward.xphp" + Then a "typeParameter" token covers "T" in "/forward.xphp" Scenario: Multiline block comment highlights on every physical line Given the file at "/doc.xphp" contains the following lines: diff --git a/src/Handler/SemanticTokens/AstVisitor.php b/src/Handler/SemanticTokens/AstVisitor.php index 68e0ebd..0b24456 100644 --- a/src/Handler/SemanticTokens/AstVisitor.php +++ b/src/Handler/SemanticTokens/AstVisitor.php @@ -95,14 +95,21 @@ public function visit(array $stmts): array // half the response size at every parameter. $specs = []; $reclassifyVariableAt = []; + // File-wide set of every type-param name declared anywhere in the file + // (`name => true`), accumulated by the AST walk from ATTR_GENERIC_PARAMS / + // ATTR_METHOD_GENERIC_PARAMS. The token pass consults it to tell a + // forwarded in-scope type variable (`Foo::create::()` inside a + // generic body -- `typeParameter`) from a concrete named type argument + // (`Foo::create::()` -- `class`). + $declaredTypeParamNames = []; if ($stmts !== []) { $traverser = new NodeTraverser(); - $traverser->addVisitor($this->newAstWalker($specs, $reclassifyVariableAt)); + $traverser->addVisitor($this->newAstWalker($specs, $reclassifyVariableAt, $declaredTypeParamNames)); $traverser->traverse($stmts); } - $this->collectFromTokens($specs, $reclassifyVariableAt); + $this->collectFromTokens($specs, $reclassifyVariableAt, $declaredTypeParamNames); return $specs; } @@ -119,9 +126,13 @@ public function visit(array $stmts): array * T_VARIABLE starts at that offset * we emit the alternative type * INSTEAD of `variable`. + * @param array $declaredTypeParamNames set of type-param names declared + * anywhere in the file; used to + * paint a forwarded `T` inside a + * turbofish as `typeParameter`. * @param list $out */ - private function collectFromTokens(array &$out, array $reclassifyVariableAt = []): void + private function collectFromTokens(array &$out, array $reclassifyVariableAt = [], array $declaredTypeParamNames = []): void { // Non-strict tokenization (flags=0). TOKEN_PARSE turns // PhpToken into a strict-mode tokenizer that throws ParseError @@ -139,11 +150,19 @@ private function collectFromTokens(array &$out, array $reclassifyVariableAt = [] // identifier or backslash (FQN start). This rejects // `$size < count($items)` (LHS is T_VARIABLE, not T_STRING) // and `Foo::BAR < 5` (RHS is a number, not uppercase ident). - // Inside a clause: T_STRING tokens emit as `typeParameter`, - // backslashes as part of FQNs (left unclassified -- the - // surrounding T_STRING segments paint). Depth-counted so - // nested `Box>` still classifies T. - $genericDepth = 0; + // + // `$clauseKindStack` is a stack of clause KINDS parallel to the + // clause depth (`count($clauseKindStack)`): `'decl'` for a + // declaration clause (`class Box`, `fn`) and `'call'` for a + // call-site turbofish (`Box::`). The kind decides how an + // in-clause identifier is classified: in a `'decl'` clause the + // names are formal type params (`typeParameter`); in a `'call'` + // clause they are concrete type arguments (`type` for builtins, + // `class` for named user types, `typeParameter` only for a + // forwarded in-scope type var). Nested clauses inherit the + // parent kind so `Box::>` keeps the inner `Lst` as + // call-site args. + $clauseKindStack = []; $lastSignificantTokenId = null; $tokenCount = count($tokens); @@ -164,8 +183,12 @@ private function collectFromTokens(array &$out, array $reclassifyVariableAt = [] // Open / close angle-clause state on single-char tokens. if (!$isNamedToken && $token->text === '<') { - if ($genericDepth > 0) { - $genericDepth++; + $openedKind = null; + if ($clauseKindStack !== []) { + // Nested clause: inherit the enclosing kind so + // `Box::>` keeps the inner `Lst` as call-site + // args. + $openedKind = end($clauseKindStack); } elseif ($lastSignificantTokenId === T_STRING && self::peekIsUppercaseIdent($tokens, $i + 1) && self::nameBeforeAngleIsDeclaration($tokens, $i) @@ -177,14 +200,14 @@ private function collectFromTokens(array &$out, array $reclassifyVariableAt = [] // bareword comparison whose left side ends in a name -- // `Foo::CONST < Bar`, `MY_CONST < Other` -- from being // mistaken for a generic declaration. - $genericDepth = 1; + $openedKind = 'decl'; } elseif (($lastSignificantTokenId === T_FN || $lastSignificantTokenId === T_FUNCTION) && self::peekIsUppercaseIdent($tokens, $i + 1) ) { // Anonymous generic closure / arrow declaration clause: // `fn(…)`, `function(…)` -- the `<` follows the // `fn` / `function` keyword (no name between). - $genericDepth = 1; + $openedKind = 'decl'; } elseif ($lastSignificantTokenId === T_DOUBLE_COLON) { // Call-site turbofish: `Foo::`, `static::`, // `$obj->m::` -- the `<` follows the `::` of `::<`. A `::` @@ -198,10 +221,25 @@ private function collectFromTokens(array &$out, array $reclassifyVariableAt = [] // dropping every arg's token. Opening on the empty // `Foo::<>` is harmless: the next `>` closes it immediately // with nothing classified inside. - $genericDepth = 1; + $openedKind = 'call'; + // P2: the turbofish `::` is an operator delimiter. Paint it + // here -- only when the `<` actually opens a clause -- so a + // bare `Foo::BAR` (no following `<`) never colors its `::`. + $colonIdx = self::previousSignificant($tokens, $i - 1); + if ($colonIdx >= 0 && $tokens[$colonIdx]->id === T_DOUBLE_COLON) { + $this->emit($out, $tokens[$colonIdx]->pos, strlen($tokens[$colonIdx]->text), 'operator'); + } + } + if ($openedKind !== null) { + $clauseKindStack[] = $openedKind; + // P2: the opening `<` delimiter. Only painted when a clause + // was actually pushed, so a comparison `<` stays uncolored. + $this->emit($out, $token->pos, 1, 'operator'); } - } elseif (!$isNamedToken && $token->text === '>' && $genericDepth > 0) { - $genericDepth--; + } elseif (!$isNamedToken && $token->text === '>' && $clauseKindStack !== []) { + array_pop($clauseKindStack); + // P2: the closing `>` delimiter. + $this->emit($out, $token->pos, 1, 'operator'); } // Classify the token. @@ -213,15 +251,37 @@ private function collectFromTokens(array &$out, array $reclassifyVariableAt = [] // (single spec, half the response size). $type = $reclassifyVariableAt[$token->pos]; } - if ($type === null && $genericDepth > 0 && self::isIdentInGenericClause($token->id)) { - // Inside a generic clause an identifier is a type - // name -- emit as `typeParameter` for the LSP-spec - // standard classification. Covers bare T_STRING - // (`T`) and qualified-name tokens - // (T_NAME_FULLY_QUALIFIED `\Stringable`, - // T_NAME_QUALIFIED `App\Foo`, T_NAME_RELATIVE - // `namespace\Foo`). - $type = 'typeParameter'; + if ($type === null && $clauseKindStack !== [] && self::isIdentInGenericClause($token->id)) { + // Inside a generic clause an identifier is a type name. + // How it's classified depends on the clause KIND: + // + // - Declaration clause (`class Box`, `fn`): the + // names are formal type parameters -> `typeParameter`. + // Covers bare `T` and bound refs (``). + // + // - Call-site turbofish (`Box::`, + // `Util::identity::`): the names are concrete + // type ARGUMENTS, not formal params. Per LSP semantics + // `typeParameter` denotes a formal variable, so a + // concrete arg must be tokenized as the kind it + // resolves to: + // * a builtin/scalar (`int`, `string`, `void`, ...) + // -> `type`; + // * a forwarded in-scope type variable (`T` passed + // along inside a generic body) -> `typeParameter`; + // * otherwise a named user type -> `class` (the plugin + // colors class/interface/enum identically, so a flat + // `class` is visually correct without resolving the + // exact kind on every keystroke). + if (end($clauseKindStack) === 'decl') { + $type = 'typeParameter'; + } elseif ($token->id === T_STRING && self::isReservedWordIdent($token->text)) { + $type = 'type'; + } elseif ($token->id === T_STRING && isset($declaredTypeParamNames[$token->text])) { + $type = 'typeParameter'; + } else { + $type = 'class'; + } } if ($type === null && $token->id === T_STRING && self::isReservedWordIdent($token->text)) { // PHP tokenizes `null`, `true`, `false`, `void`, @@ -388,10 +448,15 @@ private static function previousSignificant(array $tokens, int $from): int * alternative type * for the T_VARIABLE * that starts there. + * @param array &$declaredTypeParamNames set of every type-param + * name declared in the file; + * lets the token pass paint a + * forwarded `T` in a turbofish + * as `typeParameter`. */ - private function newAstWalker(array &$out, array &$reclassifyVariableAt): NodeVisitorAbstract + private function newAstWalker(array &$out, array &$reclassifyVariableAt, array &$declaredTypeParamNames): NodeVisitorAbstract { - $visitor = new class($out, $reclassifyVariableAt, $this) extends NodeVisitorAbstract { + $visitor = new class($out, $reclassifyVariableAt, $declaredTypeParamNames, $this) extends NodeVisitorAbstract { /** * Stack of in-scope type-param name sets. Each frame is the * set of names declared on an enclosing ClassLike via @@ -403,12 +468,14 @@ private function newAstWalker(array &$out, array &$reclassifyVariableAt): NodeVi private array $typeParamStack = []; /** - * @param list $out - * @param array $reclassifyVariableAt + * @param list $out + * @param array $reclassifyVariableAt + * @param array $declaredTypeParamNames */ public function __construct( private array &$out, private array &$reclassifyVariableAt, + private array &$declaredTypeParamNames, private AstVisitor $emitter, ) { } @@ -422,6 +489,7 @@ public function enterNode(Node $node) foreach ($params as $param) { if ($param instanceof \XPHP\Transpiler\Monomorphize\TypeParam) { $frame[$param->name] = true; + $this->declaredTypeParamNames[$param->name] = true; } } $this->typeParamStack[] = $frame; @@ -451,6 +519,7 @@ public function enterNode(Node $node) foreach ($params as $param) { if ($param instanceof \XPHP\Transpiler\Monomorphize\TypeParam) { $frame[$param->name] = true; + $this->declaredTypeParamNames[$param->name] = true; } } } diff --git a/test/Handler/SemanticTokens/AstVisitorTest.php b/test/Handler/SemanticTokens/AstVisitorTest.php index 18f6cc9..738da78 100644 --- a/test/Handler/SemanticTokens/AstVisitorTest.php +++ b/test/Handler/SemanticTokens/AstVisitorTest.php @@ -288,57 +288,60 @@ public function testBoundTypeParamPaintsAsTypeParameter(): void public function testTypeArgClausePaintsInsideBoxOfPlastic(): void { - // Form 6: new Box::() -- `Plastic` inside the turbofish is - // typeParameter. The clause opens on `<` preceded by `::`. + // Form 6: new Box::() -- `Plastic` inside the turbofish is a + // concrete type ARGUMENT, not a formal param: it's a named user type, + // so it paints as `class`. The clause opens on `<` preceded by `::`. $source = "();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'Plastic', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'Plastic', 'class'); } public function testNestedTypeArgClause(): void { // Nested: Box::> -- the outer clause is a turbofish; the inner - // `Lst` is a bare nested type-arg. Both `Lst` and `T` are - // typeParameter. + // `Lst` is a nested call-site type-arg (inherits the call kind). + // Both `Lst` and `T` are concrete type args here (no enclosing generic + // declaration puts `T` in scope), so both paint as `class`. $source = ">();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'Lst', 'typeParameter'); - $this->assertTokenSubstring($specs, $source, 'T', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'Lst', 'class'); + $this->assertTokenSubstring($specs, $source, 'T', 'class'); } public function testStaticCallTurbofishPaintsTypeArg(): void { // Util::identity::(42) -- the call-site turbofish opens on the - // `<` after `::`; the lowercase scalar `int` inside is typeParameter - // (the `::` makes the clause unambiguous against `<` comparison). + // `<` after `::`; the lowercase scalar `int` inside is a concrete + // builtin type arg, so it paints as `type` (not `typeParameter`). $source = "(\$x);"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'int', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'int', 'type'); } public function testStaticReceiverTurbofishPaintsTypeArg(): void { - // static::() -- the receiver before `::` is the T_STATIC keyword; - // the clause still opens on the `<` after `::`. + // static::() -- the receiver before `::` is the T_STATIC + // keyword; the clause still opens on the `<` after `::`. `Plastic` is + // a named concrete type arg -> `class`. $source = "();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'Plastic', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'Plastic', 'class'); } public function testSelfTurbofishPaintsTypeArg(): void { // `self::()` -- the pseudo-type turbofish opens a clause after - // the `::`; `Plastic` inside is a typeParameter. + // the `::`; `Plastic` inside is a named concrete type arg -> `class`. $source = "();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'Plastic', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'Plastic', 'class'); } public function testParentTurbofishPaintsTypeArg(): void { $source = "();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'Plastic', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'Plastic', 'class'); } public function testStaticReceiverIsClassifiedAsKeyword(): void @@ -347,7 +350,7 @@ public function testStaticReceiverIsClassifiedAsKeyword(): void $source = "();"; $specs = $this->collect($source); $this->assertTokenSubstring($specs, $source, 'static', 'keyword'); - $this->assertTokenSubstring($specs, $source, 'Plastic', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'Plastic', 'class'); } public function testArrowClosureDeclarationClausePaintsTypeParam(): void @@ -418,21 +421,22 @@ public function testTurbofishWithLowercaseScalarFirstArgOpensClause(): void // `Box::()` -- the `::` anchor makes the clause unambiguous, so a // lowercase scalar first arg MUST open it and be painted. (The // uppercase-ident guard belongs to the bare-`<` declaration branch, not - // the turbofish branch, where it would drop the whole clause.) + // the turbofish branch, where it would drop the whole clause.) `int` is + // a builtin scalar arg -> `type`. $source = "();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'int', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'int', 'type'); } public function testTurbofishMultipleArgsLowercaseFirstHighlightsAll(): void { // `Map::()` -- a lowercase first arg must not suppress the - // whole clause: both the scalar `int` and the class `User` in the later - // slot are type arguments and must be painted. + // whole clause: both args are painted, each by its resolved kind -- + // the scalar `int` -> `type`, the named user type `User` -> `class`. $source = "();"; $specs = $this->collect($source); - $this->assertTokenSubstring($specs, $source, 'int', 'typeParameter'); - $this->assertTokenSubstring($specs, $source, 'User', 'typeParameter'); + $this->assertTokenSubstring($specs, $source, 'int', 'type'); + $this->assertTokenSubstring($specs, $source, 'User', 'class'); } public function testTurbofishClauseClosesSoTrailingNameIsNotTypeParam(): void @@ -449,6 +453,87 @@ public function testTurbofishClauseClosesSoTrailingNameIsNotTypeParam(): void self::assertEmpty($otherSpecs, 'identifier after a closed turbofish must not be a type parameter'); } + public function testTurbofishForwardedTypeParamPaintsAsTypeParameter(): void + { + // A generic body forwarding its own `T` through a turbofish keeps that + // `T` as `typeParameter` -- it's a formal type variable being passed + // along, not a concrete arg. The file-wide declared-type-param set + // (built from the enclosing class's ATTR_GENERIC_PARAMS) drives this. + $source = <<<'XPHP' + { + public function make(): mixed { return Inner::create::(); } + } + XPHP; + $specs = $this->collect($source); + + // Assert the `T` inside `::` SPECIFICALLY (not the decl-clause `T`, + // which the token pass classifies independently). + $forwardedTOffset = strpos($source, 'create::<') + strlen('create::<'); + $forwarded = array_filter( + $specs, + fn (TokenSpec $s) => self::substring($source, $s) === 'T' + && $s->type === 'typeParameter' + && self::specByteOffset($source, $s) === $forwardedTOffset, + ); + self::assertCount(1, $forwarded, 'forwarded `T` in a turbofish must be a type parameter'); + } + + public function testTurbofishConcreteArgNotMatchingTypeParamIsClass(): void + { + // Same shape as the forwarded-T test, but the arg `User` is not a + // declared type param -> it's a concrete named type arg -> `class`. + $source = <<<'XPHP' + { + public function make(): mixed { return Inner::create::(); } + } + XPHP; + $specs = $this->collect($source); + $this->assertTokenSubstring($specs, $source, 'User', 'class'); + } + + public function testTurbofishBuiltinArgPaintsAsType(): void + { + // `Box::()` -- `string` is a builtin scalar arg -> `type`. + $source = "();"; + $specs = $this->collect($source); + $this->assertTokenSubstring($specs, $source, 'string', 'type'); + } + + public function testTurbofishPunctuationEmitsOperatorTokens(): void + { + // P2: the turbofish delimiters `::`, `<`, `>` paint as `operator` so + // they don't render uncolored. + $source = "();"; + $specs = $this->collect($source); + $this->assertTokenSubstring($specs, $source, '::', 'operator'); + $this->assertTokenSubstring($specs, $source, '<', 'operator'); + $this->assertTokenSubstring($specs, $source, '>', 'operator'); + } + + public function testDeclarationClauseAnglesEmitOperatorTokens(): void + { + // P2: the `<` / `>` of a declaration clause also paint as `operator`. + $source = " {}"; + $specs = $this->collect($source); + $this->assertTokenSubstring($specs, $source, '<', 'operator'); + $this->assertTokenSubstring($specs, $source, '>', 'operator'); + } + + public function testBareDoubleColonEmitsNoOperatorToken(): void + { + // A plain `Foo::BAR` member access (no following `<`) is not a turbofish, + // so its `::` must NOT be painted as an operator -- the operator + // emission is scoped to generic clauses only. + $source = "collect($source); + $operatorSpecs = array_filter($specs, fn (TokenSpec $s) => $s->type === 'operator'); + self::assertEmpty($operatorSpecs, 'bare `::` must not emit an operator token'); + } + public function testMultipleTypeArgsSeparatedByComma(): void { // Form 9: Pair -- both K and V are typeParameter. From 5a3dcaac0d970da13fa4e5209cb6dc7d3df6eb8f Mon Sep 17 00:00:00 2001 From: Matheus Martins Date: Sat, 13 Jun 2026 20:25:52 +0000 Subject: [PATCH 2/2] ci(release): bump version in server info sent to clients Release 0.2.3. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/LspDispatcherFactory.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/LspDispatcherFactory.php b/src/LspDispatcherFactory.php index f028ad8..9c2aa58 100644 --- a/src/LspDispatcherFactory.php +++ b/src/LspDispatcherFactory.php @@ -376,7 +376,7 @@ public function create(MessageTransmitter $transmitter, InitializeParams $initia new ErrorHandlingMiddleware($this->logger), new InitializeMiddleware($handlers, $eventDispatcher, [ 'name' => 'xphp-lsp', - 'version' => '0.2.2', + 'version' => '0.2.3', ]), new ShutdownMiddleware($eventDispatcher), new ResponseHandlingMiddleware($responseWatcher),