diff --git a/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php b/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php index 5cc6075d..1362df5a 100644 --- a/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php +++ b/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php @@ -3,6 +3,8 @@ namespace Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Diagnostics; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Classification\ClassificationContext; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Classification\SubtreeClassifier; use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\FallbackDiagnostic; use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Support\DomHelpersTrait; use Automattic\BlocksEngine\PhpTransformer\WordPress\Runtime; @@ -63,6 +65,8 @@ final class FallbackEmitter */ private array $runtimeCanvasSelectors = array(); + private readonly SubtreeClassifier $classifier; + /** * @param Closure(DOMElement): array $sourceContextResolver * Resolves the shared `sourceContext` enrichment for an element. The @@ -74,6 +78,7 @@ public function __construct( private readonly Runtime $runtime, private readonly Closure $sourceContextResolver ) { + $this->classifier = new SubtreeClassifier(); } /** @@ -110,6 +115,7 @@ public function captureInlineSvgFallback(DOMElement $element, array &$fallbacks) 'selector' => $this->elementSelector($element), 'attributes' => $this->safeSvgAttributes($element), 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'events' => $this->eventMetadata($element), 'text_length' => strlen(trim($element->textContent ?? '')), 'child_count' => $this->childElementCount($element), @@ -148,6 +154,7 @@ public function captureCanvasFallback(DOMElement $element, array &$fallbacks, ar 'selector' => $this->elementSelector($element), 'attributes' => $this->safeCanvasAttributes($element), 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'events' => $this->eventMetadata($element), 'script_dependency_hint' => '' !== $id ? 'Scripts may target #' . $id . ' and call canvas APIs such as getContext(); replacing it with a wrapper block changes runtime behavior.' @@ -184,6 +191,7 @@ public function captureScriptFallback(DOMElement $element, array &$fallbacks, ar 'selector' => $this->elementSelector($element), 'attributes' => $this->safeScriptAttributes($element), 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'events' => $this->eventMetadata($element), 'script_role' => $scriptRole, 'script_source_kind' => '' !== trim($this->attr($element, 'src')) ? 'external' : 'inline', @@ -268,6 +276,7 @@ public function captureTemplateFallback(DOMElement $element, array &$fallbacks, 'selector' => $this->elementSelector($element), 'attributes' => $attributes, 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'template_role' => $this->templateRole($element), 'text_length' => strlen(trim($element->textContent ?? '')), 'child_count' => $this->childElementCount($element), @@ -374,6 +383,128 @@ private function sourceContext(DOMElement $element): array return ( $this->sourceContextResolver )($element); } + /** + * Project the structural subtree-classifier verdict for a subtree that fell + * back to raw `core/html`. This is MEASUREMENT-ONLY diagnostic metadata + * (issue #497): it surfaces which raw-HTML fallbacks the classifier believes + * should have become native blocks, across the whole corpus. It does NOT + * change routing or block output — the verdict is attached to the existing + * fallback diagnostic and nothing else consumes it yet. + * + * Guarded to the core/html fallback path: it only runs when a fallback + * diagnostic is actually being emitted, never per converted element. + * + * @return array{bucket: string, confidence: float, signals: array} + */ + public function classifyFallbackSubtree(DOMElement $element): array + { + $result = $this->classifier->classify($element, $this->classificationContext($element)); + + return array( + 'bucket' => $result->bucket(), + 'confidence' => $result->confidence(), + 'signals' => $this->topClassificationSignals($result->signals()), + ); + } + + private function classificationContext(DOMElement $element): ClassificationContext + { + return new ClassificationContext( + $this->subtreeInlineCss($element), + $this->subtreeJsText($element) + ); + } + + /** + * Inline CSS declared on the subtree (`style` attributes), which is the CSS + * association cheaply available at the fallback emission point. + */ + private function subtreeInlineCss(DOMElement $element): string + { + $parts = array(); + foreach ( $this->subtreeElements($element) as $node ) { + $style = trim($this->attr($node, 'style')); + if ( '' !== $style ) { + $parts[] = $style; + } + } + + return implode("\n", $parts); + } + + /** + * JavaScript associated with the subtree: inline `