Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

namespace Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Diagnostics;

use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Classification\ClassificationContext;
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Classification\SubtreeClassifier;
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\FallbackDiagnostic;
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Support\DomHelpersTrait;
use Automattic\BlocksEngine\PhpTransformer\WordPress\Runtime;
Expand Down Expand Up @@ -63,6 +65,8 @@ final class FallbackEmitter
*/
private array $runtimeCanvasSelectors = array();

private readonly SubtreeClassifier $classifier;

/**
* @param Closure(DOMElement): array<string, mixed> $sourceContextResolver
* Resolves the shared `sourceContext` enrichment for an element. The
Expand All @@ -74,6 +78,7 @@ public function __construct(
private readonly Runtime $runtime,
private readonly Closure $sourceContextResolver
) {
$this->classifier = new SubtreeClassifier();
}

/**
Expand Down Expand Up @@ -110,6 +115,7 @@ public function captureInlineSvgFallback(DOMElement $element, array &$fallbacks)
'selector' => $this->elementSelector($element),
'attributes' => $this->safeSvgAttributes($element),
'context' => $this->sourceContext($element),
'classification' => $this->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'text_length' => strlen(trim($element->textContent ?? '')),
'child_count' => $this->childElementCount($element),
Expand Down Expand Up @@ -148,6 +154,7 @@ public function captureCanvasFallback(DOMElement $element, array &$fallbacks, ar
'selector' => $this->elementSelector($element),
'attributes' => $this->safeCanvasAttributes($element),
'context' => $this->sourceContext($element),
'classification' => $this->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'script_dependency_hint' => '' !== $id
? 'Scripts may target #' . $id . ' and call canvas APIs such as getContext(); replacing it with a wrapper block changes runtime behavior.'
Expand Down Expand Up @@ -184,6 +191,7 @@ public function captureScriptFallback(DOMElement $element, array &$fallbacks, ar
'selector' => $this->elementSelector($element),
'attributes' => $this->safeScriptAttributes($element),
'context' => $this->sourceContext($element),
'classification' => $this->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'script_role' => $scriptRole,
'script_source_kind' => '' !== trim($this->attr($element, 'src')) ? 'external' : 'inline',
Expand Down Expand Up @@ -268,6 +276,7 @@ public function captureTemplateFallback(DOMElement $element, array &$fallbacks,
'selector' => $this->elementSelector($element),
'attributes' => $attributes,
'context' => $this->sourceContext($element),
'classification' => $this->classifyFallbackSubtree($element),
'template_role' => $this->templateRole($element),
'text_length' => strlen(trim($element->textContent ?? '')),
'child_count' => $this->childElementCount($element),
Expand Down Expand Up @@ -374,6 +383,128 @@ private function sourceContext(DOMElement $element): array
return ( $this->sourceContextResolver )($element);
}

/**
* Project the structural subtree-classifier verdict for a subtree that fell
* back to raw `core/html`. This is MEASUREMENT-ONLY diagnostic metadata
* (issue #497): it surfaces which raw-HTML fallbacks the classifier believes
* should have become native blocks, across the whole corpus. It does NOT
* change routing or block output — the verdict is attached to the existing
* fallback diagnostic and nothing else consumes it yet.
*
* Guarded to the core/html fallback path: it only runs when a fallback
* diagnostic is actually being emitted, never per converted element.
*
* @return array{bucket: string, confidence: float, signals: array<string, mixed>}
*/
public function classifyFallbackSubtree(DOMElement $element): array
{
$result = $this->classifier->classify($element, $this->classificationContext($element));

return array(
'bucket' => $result->bucket(),
'confidence' => $result->confidence(),
'signals' => $this->topClassificationSignals($result->signals()),
);
}

private function classificationContext(DOMElement $element): ClassificationContext
{
return new ClassificationContext(
$this->subtreeInlineCss($element),
$this->subtreeJsText($element)
);
}

/**
* Inline CSS declared on the subtree (`style` attributes), which is the CSS
* association cheaply available at the fallback emission point.
*/
private function subtreeInlineCss(DOMElement $element): string
{
$parts = array();
foreach ( $this->subtreeElements($element) as $node ) {
$style = trim($this->attr($node, 'style'));
if ( '' !== $style ) {
$parts[] = $style;
}
}

return implode("\n", $parts);
}

/**
* JavaScript associated with the subtree: inline `<script>` bodies plus
* inline `on*` event-handler attribute source within the subtree.
*/
private function subtreeJsText(DOMElement $element): string
{
$parts = array();
foreach ( $this->subtreeElements($element) as $node ) {
if ( 'script' === strtolower($node->tagName) && '' === trim($this->attr($node, 'src')) ) {
$body = trim($node->textContent ?? '');
if ( '' !== $body ) {
$parts[] = $body;
}
}
foreach ( $node->attributes ?? array() as $attribute ) {
$name = strtolower($attribute->nodeName);
if ( str_starts_with($name, 'on') && strlen($name) > 2 ) {
$value = trim($attribute->nodeValue ?? '');
if ( '' !== $value ) {
$parts[] = $value;
}
}
}
}

return implode("\n", $parts);
}

/**
* @return array<int, DOMElement>
*/
private function subtreeElements(DOMElement $element): array
{
$out = array( $element );
foreach ( $element->getElementsByTagName('*') as $descendant ) {
if ( $descendant instanceof DOMElement ) {
$out[] = $descendant;
}
}

return $out;
}

/**
* Condense the raw classifier signals to the diagnostic-relevant top signals:
* the active boolean flags, the positive structural counts, and the per-bucket
* scores that drove the verdict.
*
* @param array<string, mixed> $signals
* @return array<string, mixed>
*/
private function topClassificationSignals(array $signals): array
{
$flags = array();
$counts = array();
foreach ( $signals as $name => $value ) {
if ( 'scores' === $name ) {
continue;
}
if ( true === $value ) {
$flags[] = $name;
} elseif ( is_int($value) && $value > 0 ) {
$counts[$name] = $value;
}
}

return array_filter(array(
'flags' => $flags,
'counts' => $counts,
'scores' => is_array($signals['scores'] ?? null) ? $signals['scores'] : array(),
), static fn (mixed $value): bool => array() !== $value);
}

private function runtimeIslandSelector(DOMElement $element): string
{
$id = trim($this->attr($element, 'id'));
Expand Down
3 changes: 3 additions & 0 deletions php-transformer/src/HtmlToBlocks/HtmlTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,7 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
'attributes' => $this->htmlAttributes($element),
'form' => $this->formMetadata($element),
'context' => $this->sourceContext($element),
'classification' => $this->fallbackEmitter->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'readable_blocks' => null !== $readableFormBlock ? array( $readableFormBlock ) : array(),
'controls' => $controls,
Expand Down Expand Up @@ -1321,6 +1322,7 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
'selector' => $this->elementSelector($element),
'attributes' => $this->htmlAttributes($element),
'context' => $this->sourceContext($element),
'classification' => $this->fallbackEmitter->classifyFallbackSubtree($element),
'events' => $this->eventMetadata($element),
'text_length' => strlen(trim($element->textContent ?? '')),
'child_count' => $this->childElementCount($element),
Expand Down Expand Up @@ -4677,6 +4679,7 @@ private function convertIframeElement(DOMElement $iframe, array &$fallbacks): ?a
'selector' => $this->elementSelector($iframe),
'attributes' => $this->safeEmbedAttributes($iframe),
'context' => $this->sourceContext($iframe),
'classification' => $this->fallbackEmitter->classifyFallbackSubtree($iframe),
'events' => $this->eventMetadata($iframe),
'html' => $boundedHtml['html'],
'html_bytes' => $boundedHtml['bytes'],
Expand Down
10 changes: 10 additions & 0 deletions php-transformer/tests/contract/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,16 @@ public function match(DOMElement $element, PatternContext $context): ?array
$assert('preserve_runtime_island' === ($runtimeCanvasResult['source_reports']['runtime_islands'][0]['suggested_generic_repair_class'] ?? ''), 'runtime island exposes generic repair class metadata');
$assert($runtimeCanvasResult['source_reports']['runtime_islands'] === ($runtimeCanvasResult['source_reports']['conversion_report']['runtime_islands'] ?? array()), 'conversion report projects runtime islands');

// Measurement wiring (#497): the core/html canvas fallback carries the subtree
// classifier verdict so the corpus surfaces which raw-HTML dumps the classifier
// believes should have been native blocks. Measurement only — block output is
// unchanged (the canvas still falls back to a runtime island, not a block).
$canvasFallbackClassification = $runtimeCanvasResult['fallbacks'][0]['classification'] ?? array();
$assert('custom_application' === ($canvasFallbackClassification['bucket'] ?? ''), 'core/html canvas fallback carries subtree classifier bucket verdict');
$assert(is_float($canvasFallbackClassification['confidence'] ?? null) && ($canvasFallbackClassification['confidence'] ?? 0.0) > 0.0, 'core/html canvas fallback carries classifier confidence');
$assert(in_array('canvas', $canvasFallbackClassification['signals']['flags'] ?? array(), true), 'core/html canvas fallback exposes top classifier signals');
$assert(array() === ($runtimeCanvasResult['blocks'] ?? array()), 'classifier measurement wiring leaves canvas block output unchanged');

$invalidStatus = $result;
$invalidStatus['status'] = 'ok';
$assertInvalidCanonicalEnvelope($invalidStatus, 'unsupported status', 'canonical validation rejects unsupported status values');
Expand Down
Loading