From 1ff491e6ad89fbf1b41c6daf3056ac0d0b49ef95 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Sun, 28 Jun 2026 07:30:45 -0400 Subject: [PATCH] Dedupe transformer helpers duplicated by decomposition slices 3/4 Refactor slices 3 (#246, SemanticParityReporter) and 4 (#249, FallbackEmitter) were file-scoped, so they copied shared leaf helpers instead of promoting them. Each of the following was byte-identical across its copies; consolidate the single definition into Support/DomHelpersTrait (already used by all three classes) and remove the duplicates: - hasAncestorTag (HtmlTransformer + SemanticParityReporter) - hasSourceNavigationSignal (HtmlTransformer + SemanticParityReporter) - safeNavigationUrl (HtmlTransformer + SemanticParityReporter) - runtimeIslandSelector (HtmlTransformer + FallbackEmitter) - eventMetadata (HtmlTransformer + FallbackEmitter) - isSafeSvgContent (HtmlTransformer + FallbackEmitter) - dedupeArrayRows (HtmlTransformer + FallbackEmitter) Behavior-preserving pure dedupe: no logic or signature changes. Canonical and parity (144 fixtures) identical before and after; full composer test green. Refs #242 Co-Authored-By: Claude Opus 4.8 --- .../Diagnostics/FallbackEmitter.php | 64 ---------- .../Diagnostics/SemanticParityReporter.php | 45 -------- .../src/HtmlToBlocks/HtmlTransformer.php | 108 ----------------- .../HtmlToBlocks/Support/DomHelpersTrait.php | 109 ++++++++++++++++++ 4 files changed, 109 insertions(+), 217 deletions(-) diff --git a/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php b/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php index 1fa685f0..6aa70398 100644 --- a/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php +++ b/php-transformer/src/HtmlToBlocks/Diagnostics/FallbackEmitter.php @@ -708,42 +708,6 @@ private function topClassificationSignals(array $signals): array ), static fn (mixed $value): bool => array() !== $value); } - private function runtimeIslandSelector(DOMElement $element): string - { - $id = trim($this->attr($element, 'id')); - if ( '' !== $id ) { - return '#' . $id; - } - - foreach ( preg_split('/\s+/', trim($this->attr($element, 'class'))) ?: array() as $class ) { - if ( '' !== $class ) { - return '.' . $class; - } - } - - return $this->elementSelector($element); - } - - /** - * @param array> $rows - * @return array> - */ - private function dedupeArrayRows(array $rows): array - { - $seen = array(); - $deduped = array(); - foreach ( $rows as $row ) { - $key = json_encode($row, JSON_UNESCAPED_SLASHES); - if ( ! is_string($key) || isset($seen[$key]) ) { - continue; - } - $seen[$key] = true; - $deduped[] = $row; - } - - return $deduped; - } - private function templateRequiresRuntimePreservation(DOMElement $element): bool { foreach ( $this->htmlAttributes($element) as $name => $value ) { @@ -897,32 +861,4 @@ private function safeSvgAttributes(DOMElement $element): array return $attributes; } - /** - * @return array> - */ - private function eventMetadata(DOMElement $element): array - { - $events = array(); - foreach ( $this->htmlAttributes($element) as $name => $value ) { - if ( preg_match('/^on([a-z]+)$/i', $name, $matches) ) { - $events[] = array( - 'type' => strtolower($matches[1]), - 'attribute' => strtolower($name), - ); - } - if ( preg_match('/^data-(?:action|on|event)$/i', $name) && '' !== trim($value) ) { - $events[] = array( - 'type' => 'declared', - 'attribute' => $name, - ); - } - } - - return $events; - } - - private function isSafeSvgContent(string $content): bool - { - return '' !== trim($content) && preg_match('/)/i', $content) && ! preg_match('/<\s*script\b|\son[a-z]+\s*=|javascript\s*:/i', $content); - } } diff --git a/php-transformer/src/HtmlToBlocks/Diagnostics/SemanticParityReporter.php b/php-transformer/src/HtmlToBlocks/Diagnostics/SemanticParityReporter.php index 13992aa5..03ff9aa8 100644 --- a/php-transformer/src/HtmlToBlocks/Diagnostics/SemanticParityReporter.php +++ b/php-transformer/src/HtmlToBlocks/Diagnostics/SemanticParityReporter.php @@ -138,20 +138,6 @@ private function landmarkKindForElement(DOMElement $element): string }; } - /** - * @param array $tagNames - */ - private function hasAncestorTag(DOMElement $element, array $tagNames): bool - { - for ( $node = $element->parentNode; $node instanceof DOMElement && 'body' !== strtolower($node->tagName); $node = $node->parentNode ) { - if ( in_array(strtolower($node->tagName), $tagNames, true) ) { - return true; - } - } - - return false; - } - /** * @param array> $blocks * @param array> $sourceProvenance @@ -367,23 +353,6 @@ private function isSourceMenuToggleControl(DOMElement $element): bool return (bool) preg_match('/(?:^|[^a-z0-9])(?:hamburger|menu|toggle)(?:[^a-z0-9]|$)/', strtolower($this->attr($element, 'class') . ' ' . $this->attr($element, 'aria-label'))); } - private function hasSourceNavigationSignal(DOMElement $element): bool - { - if ( 'navigation' === strtolower($this->attr($element, 'role')) ) { - return true; - } - - foreach ( array( 'class', 'id' ) as $attribute ) { - foreach ( preg_split('/[^a-z0-9]+/', strtolower($this->attr($element, $attribute))) ?: array() as $token ) { - if ( in_array($token, array( 'nav', 'navbar', 'navigation', 'menu', 'links' ), true) ) { - return true; - } - } - } - - return false; - } - /** * @param array $tagNames */ @@ -840,18 +809,4 @@ private function navigationItemsSignature(array $items): string return implode('|', $parts); } - /** - * Sanitize a navigation URL, dropping control characters and javascript: - * schemes. Copied from HtmlTransformer (which retains it for non-parity - * callers) so the parity reporter has no dependency on transformer state. - */ - private function safeNavigationUrl(string $url): string - { - $url = trim($url); - if ( '' === $url || preg_match('/[\x00-\x1f\x7f]|javascript\s*:/i', $url) ) { - return ''; - } - - return $url; - } } diff --git a/php-transformer/src/HtmlToBlocks/HtmlTransformer.php b/php-transformer/src/HtmlToBlocks/HtmlTransformer.php index 9beb5556..446baa5d 100644 --- a/php-transformer/src/HtmlToBlocks/HtmlTransformer.php +++ b/php-transformer/src/HtmlToBlocks/HtmlTransformer.php @@ -421,36 +421,6 @@ private function deduplicateNavigationBlocks(array $blocks): array return $this->deduplicateNavigationBlocksRecursive($blocks, $seen); } - /** - * @param array $tagNames - */ - private function hasAncestorTag(DOMElement $element, array $tagNames): bool - { - for ( $node = $element->parentNode; $node instanceof DOMElement && 'body' !== strtolower($node->tagName); $node = $node->parentNode ) { - if ( in_array(strtolower($node->tagName), $tagNames, true) ) { - return true; - } - } - - return false; - } - private function hasSourceNavigationSignal(DOMElement $element): bool - { - if ( 'navigation' === strtolower($this->attr($element, 'role')) ) { - return true; - } - - foreach ( array( 'class', 'id' ) as $attribute ) { - foreach ( preg_split('/[^a-z0-9]+/', strtolower($this->attr($element, $attribute))) ?: array() as $token ) { - if ( in_array($token, array( 'nav', 'navbar', 'navigation', 'menu', 'links' ), true) ) { - return true; - } - } - } - - return false; - } - /** * @param array> $blocks * @param array $seen @@ -3031,22 +3001,6 @@ private function recordRuntimeIsland(DOMElement $element, string $kind, string $ $this->fallbackEmitter->recordRuntimeIsland($element, $kind, $reason, $runtimeRequirement, $metadata, $this->runtimeIslands); } - private function runtimeIslandSelector(DOMElement $element): string - { - $id = trim($this->attr($element, 'id')); - if ( '' !== $id ) { - return '#' . $id; - } - - foreach ( preg_split('/\s+/', trim($this->attr($element, 'class'))) ?: array() as $class ) { - if ( '' !== $class ) { - return '.' . $class; - } - } - - return $this->elementSelector($element); - } - /** * @return array> */ @@ -3092,26 +3046,6 @@ private function generatedBlockNamespaceFromOptions(array $options): string return '' !== $namespace ? $namespace : 'custom'; } - /** - * @param array> $rows - * @return array> - */ - private function dedupeArrayRows(array $rows): array - { - $seen = array(); - $deduped = array(); - foreach ( $rows as $row ) { - $key = json_encode($row, JSON_UNESCAPED_SLASHES); - if ( ! is_string($key) || isset($seen[$key]) ) { - continue; - } - $seen[$key] = true; - $deduped[] = $row; - } - - return $deduped; - } - /** * @param array $options * @return array @@ -4153,16 +4087,6 @@ private function hasNavigationContainerSignal(DOMElement $element): bool return (bool) preg_match('/(?:^|[\s_-])(?:nav|navbar|navigation|menu|links)(?:$|[\s_-])/', $name); } - private function safeNavigationUrl(string $url): string - { - $url = trim($url); - if ( '' === $url || preg_match('/[\x00-\x1f\x7f]|javascript\s*:/i', $url) ) { - return ''; - } - - return $url; - } - private function hasDirectChildElement(DOMElement $element, string $tagName): bool { foreach ( $element->childNodes as $child ) { @@ -4684,11 +4608,6 @@ private function safeResolvedAssetImageUrl(string $url): string return $this->safeImageUrl($url); } - private function isSafeSvgContent(string $content): bool - { - return '' !== trim($content) && preg_match('/)/i', $content) && ! preg_match('/<\s*script\b|\son[a-z]+\s*=|javascript\s*:/i', $content); - } - /** * @return array */ @@ -4789,31 +4708,4 @@ private function sanitizedSyntaxHtml(DOMElement $element): string return $html; } - /** - * @param array $attrs - */ - /** - * @return array> - */ - private function eventMetadata(DOMElement $element): array - { - $events = array(); - foreach ( $this->htmlAttributes($element) as $name => $value ) { - if ( preg_match('/^on([a-z]+)$/i', $name, $matches) ) { - $events[] = array( - 'type' => strtolower($matches[1]), - 'attribute' => strtolower($name), - ); - } - if ( preg_match('/^data-(?:action|on|event)$/i', $name) && '' !== trim($value) ) { - $events[] = array( - 'type' => 'declared', - 'attribute' => $name, - ); - } - } - - return $events; - } - } diff --git a/php-transformer/src/HtmlToBlocks/Support/DomHelpersTrait.php b/php-transformer/src/HtmlToBlocks/Support/DomHelpersTrait.php index 8fc1e149..e7de5521 100644 --- a/php-transformer/src/HtmlToBlocks/Support/DomHelpersTrait.php +++ b/php-transformer/src/HtmlToBlocks/Support/DomHelpersTrait.php @@ -266,4 +266,113 @@ private function htmlAttributeString(array $attrs): string } return $html; } + + /** + * @param array $tagNames + */ + private function hasAncestorTag(DOMElement $element, array $tagNames): bool + { + for ( $node = $element->parentNode; $node instanceof DOMElement && 'body' !== strtolower($node->tagName); $node = $node->parentNode ) { + if ( in_array(strtolower($node->tagName), $tagNames, true) ) { + return true; + } + } + + return false; + } + + private function hasSourceNavigationSignal(DOMElement $element): bool + { + if ( 'navigation' === strtolower($this->attr($element, 'role')) ) { + return true; + } + + foreach ( array( 'class', 'id' ) as $attribute ) { + foreach ( preg_split('/[^a-z0-9]+/', strtolower($this->attr($element, $attribute))) ?: array() as $token ) { + if ( in_array($token, array( 'nav', 'navbar', 'navigation', 'menu', 'links' ), true) ) { + return true; + } + } + } + + return false; + } + + /** + * Sanitize a navigation URL, dropping control characters and javascript: schemes. + */ + private function safeNavigationUrl(string $url): string + { + $url = trim($url); + if ( '' === $url || preg_match('/[\x00-\x1f\x7f]|javascript\s*:/i', $url) ) { + return ''; + } + + return $url; + } + + private function runtimeIslandSelector(DOMElement $element): string + { + $id = trim($this->attr($element, 'id')); + if ( '' !== $id ) { + return '#' . $id; + } + + foreach ( preg_split('/\s+/', trim($this->attr($element, 'class'))) ?: array() as $class ) { + if ( '' !== $class ) { + return '.' . $class; + } + } + + return $this->elementSelector($element); + } + + /** + * @return array> + */ + private function eventMetadata(DOMElement $element): array + { + $events = array(); + foreach ( $this->htmlAttributes($element) as $name => $value ) { + if ( preg_match('/^on([a-z]+)$/i', $name, $matches) ) { + $events[] = array( + 'type' => strtolower($matches[1]), + 'attribute' => strtolower($name), + ); + } + if ( preg_match('/^data-(?:action|on|event)$/i', $name) && '' !== trim($value) ) { + $events[] = array( + 'type' => 'declared', + 'attribute' => $name, + ); + } + } + + return $events; + } + + private function isSafeSvgContent(string $content): bool + { + return '' !== trim($content) && preg_match('/)/i', $content) && ! preg_match('/<\s*script\b|\son[a-z]+\s*=|javascript\s*:/i', $content); + } + + /** + * @param array> $rows + * @return array> + */ + private function dedupeArrayRows(array $rows): array + { + $seen = array(); + $deduped = array(); + foreach ( $rows as $row ) { + $key = json_encode($row, JSON_UNESCAPED_SLASHES); + if ( ! is_string($key) || isset($seen[$key]) ) { + continue; + } + $seen[$key] = true; + $deduped[] = $row; + } + + return $deduped; + } }