From f658101c8374cc817b55d5a7daea2074f8f290f1 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Sat, 27 Jun 2026 20:43:49 -0400 Subject: [PATCH] Wire SubtreeClassifier into core/html fallback as measurement (#497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run the standalone SubtreeClassifier (#254) on subtrees that fall back to raw core/html and attach its verdict (bucket + confidence + top signals) to the existing fallback diagnostic. This surfaces, across the corpus, which core/html dumps the classifier believes should have been native blocks — the data the pattern-recognition swarm optimizes against. Measurement only: routing and block output are unchanged. The classifier runs solely on the core/html fallback emission path (script, canvas, template, inline SVG, form, iframe, unsupported-element), never per converted element, and only adds a `classification` key to the diagnostic. Parity 128 -> 128 with byte-identical block output; full composer test green. Adds a contract assertion that the canvas core/html fallback now carries a classifier verdict. Refs #497 Co-Authored-By: Claude Opus 4.8 --- .../Diagnostics/FallbackEmitter.php | 131 ++++++++++++++++++ .../src/HtmlToBlocks/HtmlTransformer.php | 3 + php-transformer/tests/contract/run.php | 10 ++ 3 files changed, 144 insertions(+) diff --git a/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php b/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php index 5cc6075d..1362df5a 100644 --- a/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php +++ b/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php @@ -3,6 +3,8 @@ namespace Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Diagnostics; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Classification\ClassificationContext; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Classification\SubtreeClassifier; use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\FallbackDiagnostic; use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Support\DomHelpersTrait; use Automattic\BlocksEngine\PhpTransformer\WordPress\Runtime; @@ -63,6 +65,8 @@ final class FallbackEmitter */ private array $runtimeCanvasSelectors = array(); + private readonly SubtreeClassifier $classifier; + /** * @param Closure(DOMElement): array $sourceContextResolver * Resolves the shared `sourceContext` enrichment for an element. The @@ -74,6 +78,7 @@ public function __construct( private readonly Runtime $runtime, private readonly Closure $sourceContextResolver ) { + $this->classifier = new SubtreeClassifier(); } /** @@ -110,6 +115,7 @@ public function captureInlineSvgFallback(DOMElement $element, array &$fallbacks) 'selector' => $this->elementSelector($element), 'attributes' => $this->safeSvgAttributes($element), 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'events' => $this->eventMetadata($element), 'text_length' => strlen(trim($element->textContent ?? '')), 'child_count' => $this->childElementCount($element), @@ -148,6 +154,7 @@ public function captureCanvasFallback(DOMElement $element, array &$fallbacks, ar 'selector' => $this->elementSelector($element), 'attributes' => $this->safeCanvasAttributes($element), 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'events' => $this->eventMetadata($element), 'script_dependency_hint' => '' !== $id ? 'Scripts may target #' . $id . ' and call canvas APIs such as getContext(); replacing it with a wrapper block changes runtime behavior.' @@ -184,6 +191,7 @@ public function captureScriptFallback(DOMElement $element, array &$fallbacks, ar 'selector' => $this->elementSelector($element), 'attributes' => $this->safeScriptAttributes($element), 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'events' => $this->eventMetadata($element), 'script_role' => $scriptRole, 'script_source_kind' => '' !== trim($this->attr($element, 'src')) ? 'external' : 'inline', @@ -268,6 +276,7 @@ public function captureTemplateFallback(DOMElement $element, array &$fallbacks, 'selector' => $this->elementSelector($element), 'attributes' => $attributes, 'context' => $this->sourceContext($element), + 'classification' => $this->classifyFallbackSubtree($element), 'template_role' => $this->templateRole($element), 'text_length' => strlen(trim($element->textContent ?? '')), 'child_count' => $this->childElementCount($element), @@ -374,6 +383,128 @@ private function sourceContext(DOMElement $element): array return ( $this->sourceContextResolver )($element); } + /** + * Project the structural subtree-classifier verdict for a subtree that fell + * back to raw `core/html`. This is MEASUREMENT-ONLY diagnostic metadata + * (issue #497): it surfaces which raw-HTML fallbacks the classifier believes + * should have become native blocks, across the whole corpus. It does NOT + * change routing or block output — the verdict is attached to the existing + * fallback diagnostic and nothing else consumes it yet. + * + * Guarded to the core/html fallback path: it only runs when a fallback + * diagnostic is actually being emitted, never per converted element. + * + * @return array{bucket: string, confidence: float, signals: array} + */ + public function classifyFallbackSubtree(DOMElement $element): array + { + $result = $this->classifier->classify($element, $this->classificationContext($element)); + + return array( + 'bucket' => $result->bucket(), + 'confidence' => $result->confidence(), + 'signals' => $this->topClassificationSignals($result->signals()), + ); + } + + private function classificationContext(DOMElement $element): ClassificationContext + { + return new ClassificationContext( + $this->subtreeInlineCss($element), + $this->subtreeJsText($element) + ); + } + + /** + * Inline CSS declared on the subtree (`style` attributes), which is the CSS + * association cheaply available at the fallback emission point. + */ + private function subtreeInlineCss(DOMElement $element): string + { + $parts = array(); + foreach ( $this->subtreeElements($element) as $node ) { + $style = trim($this->attr($node, 'style')); + if ( '' !== $style ) { + $parts[] = $style; + } + } + + return implode("\n", $parts); + } + + /** + * JavaScript associated with the subtree: inline `