Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
254 changes: 3 additions & 251 deletions php-transformer/src/HtmlToBlocks/HtmlTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,16 @@
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Patterns\NavigationPattern;
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Patterns\PatternContext;
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Patterns\PatternRecognizerRegistry;
use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Support\DomHelpersTrait;
use Automattic\BlocksEngine\PhpTransformer\WordPress\Runtime;
use DOMDocument;
use DOMElement;
use DOMNode;

final class HtmlTransformer
{
use DomHelpersTrait;

private const MAX_INTERACTION_CANDIDATES = 100;

/**
Expand Down Expand Up @@ -808,11 +811,6 @@ private function collectBlockNavigationItems(array $blocks, array &$items): void
}
}

private function normalizedNavigationLabel(string $label): string
{
return trim(preg_replace('/\s+/', ' ', html_entity_decode($this->runtime->stripAllTags($label), ENT_QUOTES | ENT_HTML5, 'UTF-8')) ?? $label);
}

private function sourceNavigationAnchorLabel(DOMElement $anchor): string
{
$label = $this->normalizedNavigationLabel($anchor->textContent ?? '');
Expand Down Expand Up @@ -2224,36 +2222,6 @@ private function sourceConversionMetadata(string $blockName, DOMElement $element
);
}

private function innerHtml(DOMElement $element): string
{
$html = '';
foreach ( $element->childNodes as $child ) {
$html .= $element->ownerDocument->saveHTML($child);
}

return trim($html);
}

private function innerHtmlPreservingWhitespace(DOMElement $element): string
{
$html = '';
foreach ( $element->childNodes as $child ) {
$html .= $element->ownerDocument->saveHTML($child);
}

return $html;
}

private function outerHtml(DOMElement $element): string
{
return trim($element->ownerDocument->saveHTML($element) ?: '');
}

private function attr(DOMElement $element, string $name): string
{
return $element->hasAttribute($name) ? $element->getAttribute($name) : '';
}

/**
* @return array<string, mixed>
*/
Expand All @@ -2269,16 +2237,6 @@ private function presentationAttributes(DOMElement $element): array
), static fn ($value): bool => is_array($value) ? array() !== $value : '' !== trim((string) $value));
}

private function safeAnchor(string $id): string
{
$id = trim($id);
if ( '' === $id || ! preg_match('/^[A-Za-z][A-Za-z0-9_-]*$/', $id) ) {
return '';
}

return $id;
}

private function mergedPresentationStyle(DOMElement $element): string
{
$inlineStyle = $this->attr($element, 'style');
Expand Down Expand Up @@ -2949,44 +2907,6 @@ private function dynamicTextContent(DOMElement $element): ?string
return $this->attr($element, 'data-prefix') . $value . $this->attr($element, 'data-suffix');
}

private function hasClass(DOMElement $element, string $className): bool
{
return in_array($className, preg_split('/\s+/', trim($this->attr($element, 'class'))) ?: array(), true);
}

private function elementSelector(DOMElement $element): string
{
$parts = array();
$current = $element;
while ( $current instanceof DOMElement && 'body' !== strtolower($current->tagName) ) {
$tagName = strtolower($current->tagName);
$index = 1;
for ( $sibling = $current->previousSibling; $sibling instanceof DOMNode; $sibling = $sibling->previousSibling ) {
if ( $sibling instanceof DOMElement && strtolower($sibling->tagName) === $tagName ) {
++$index;
}
}
array_unshift($parts, $tagName . ':nth-of-type(' . $index . ')');
$current = $current->parentNode instanceof DOMElement ? $current->parentNode : null;
}

return implode(' > ', $parts);
}

/**
* @return array<string, string>
*/
private function htmlAttributes(DOMElement $element): array
{
$attributes = array();
foreach ( $element->attributes ?? array() as $attribute ) {
$attributes[$attribute->nodeName] = $attribute->nodeValue ?? '';
}

ksort($attributes);
return $attributes;
}

/**
* @return array<string, string>
*/
Expand Down Expand Up @@ -3022,19 +2942,6 @@ private function sourceContext(DOMElement $element): array
), static fn (mixed $value): bool => '' !== $value && array() !== $value);
}

/**
* @return array<int, string>
*/
private function ancestorTags(DOMElement $element): array
{
$tags = array();
for ( $parent = $element->parentNode; $parent instanceof DOMElement && 'body' !== strtolower($parent->tagName); $parent = $parent->parentNode ) {
$tags[] = strtolower($parent->tagName);
}

return $tags;
}

private function nearestPreviousHeadingText(DOMElement $element): string
{
for ( $node = $element->previousSibling; $node instanceof DOMNode; $node = $node->previousSibling ) {
Expand All @@ -3046,14 +2953,6 @@ private function nearestPreviousHeadingText(DOMElement $element): string
return '';
}

/**
* @return array<int, string>
*/
private function classNames(DOMElement $element): array
{
return array_values(array_filter(preg_split('/\s+/', trim($this->attr($element, 'class'))) ?: array()));
}

/**
* @return array<string, string>
*/
Expand Down Expand Up @@ -3200,18 +3099,6 @@ private function safeSourceFragment(DOMElement $element): string
return $html;
}

private function childElementCount(DOMElement $element): int
{
$count = 0;
foreach ( $element->childNodes as $child ) {
if ( $child instanceof DOMElement ) {
++$count;
}
}

return $count;
}

/**
* @return array<int, array<string, mixed>>
*/
Expand Down Expand Up @@ -3567,21 +3454,6 @@ private function materializationHintForInteractionKind(string $kind): string
};
}

private function closestTagName(DOMElement $element): ?string
{
return $element->parentNode instanceof DOMElement ? strtolower($element->parentNode->tagName) : null;
}

private function firstChildElement(DOMElement $element, string $tagName): ?DOMElement
{
foreach ( $element->childNodes as $child ) {
if ( $child instanceof DOMElement && strtolower($child->tagName) === $tagName ) {
return $child;
}
}
return null;
}

private function figureMediaElement(DOMElement $figure, string $tagName): ?DOMElement
{
$direct = $this->firstChildElement($figure, $tagName);
Expand Down Expand Up @@ -3635,39 +3507,6 @@ private function figureLinkedMediaAnchor(DOMElement $figure): ?DOMElement
return $anchor instanceof DOMElement && $this->isImageOnlyAnchor($anchor) ? $anchor : null;
}

private function onlyChildElement(DOMElement $element, string $tagName): ?DOMElement
{
$match = null;
foreach ( $element->childNodes as $child ) {
if ( XML_TEXT_NODE === $child->nodeType && '' === trim($child->textContent ?? '') ) {
continue;
}

if ( ! $child instanceof DOMElement || strtolower($child->tagName) !== $tagName || null !== $match ) {
return null;
}

$match = $child;
}

return $match;
}

/**
* @param array<int, string> $excludedTags
*/
private function innerHtmlWithoutTags(DOMElement $element, array $excludedTags): string
{
$html = '';
foreach ( $element->childNodes as $child ) {
if ( $child instanceof DOMElement && in_array(strtolower($child->tagName), $excludedTags, true) ) {
continue;
}
$html .= $element->ownerDocument->saveHTML($child);
}
return trim($html);
}

private function citationFromElement(DOMElement $element): string
{
foreach ( $element->childNodes as $child ) {
Expand Down Expand Up @@ -3914,16 +3753,6 @@ private function listItems(DOMElement $list, array &$fallbacks): array
return $items;
}

private function safeFallbackHtml(DOMElement $element): string
{
$html = preg_replace('@<(script|style)[^>]*?>.*?</\\1>@si', '', $this->outerHtml($element)) ?? '';
$html = preg_replace('/\s+on[a-z]+\s*=\s*("[^"]*"|\'[^\']*\'|[^\s>]+)/i', '', $html) ?? '';
$html = preg_replace('/\s+(?:href|src|xlink:href)\s*=\s*("\s*javascript:[^"]*"|\'\s*javascript:[^\']*\'|javascript:[^\s>]+)/i', '', $html) ?? '';
$html = preg_replace('/\s+srcdoc\s*=\s*("[^"]*"|\'[^\']*\'|[^\s>]+)/i', '', $html) ?? '';

return trim($html);
}

/**
* @return array<string, mixed>|null
*/
Expand Down Expand Up @@ -4600,48 +4429,6 @@ private function safeCanvasAttributes(DOMElement $element): array
return $safe;
}

/**
* @return array{html: string, bytes: int, truncated: bool}
*/
private function boundedFallbackHtml(string $html): array
{
$bytes = strlen($html);
if ( $bytes > 2000 ) {
return array(
'html' => substr($html, 0, 2000) . '...',
'bytes' => $bytes,
'truncated' => true,
);
}

return array(
'html' => $html,
'bytes' => $bytes,
'truncated' => false,
);
}

/**
* @return array{text: string, bytes: int, truncated: bool}
*/
private function boundedFallbackText(string $text): array
{
$bytes = strlen($text);
if ( $bytes > 2000 ) {
return array(
'text' => substr($text, 0, 2000) . '...',
'bytes' => $bytes,
'truncated' => true,
);
}

return array(
'text' => $text,
'bytes' => $bytes,
'truncated' => false,
);
}

/**
* @return array<string, string>
*/
Expand Down Expand Up @@ -5766,18 +5553,6 @@ private function looksLikeSplitLayout(DOMElement $element): bool
return (bool) preg_match('/(?:^|[\s_-])(?:split|two[\s_-]?col|media[\s_-]?text|text[\s_-]?media|feature[\s_-]?row|hero[\s_-]?(?:inner|grid|content|layout)|content[\s_-]?grid)(?:$|[\s_-])/', $name);
}

private function directElementChildCount(DOMElement $element): int
{
$count = 0;
foreach ( $element->childNodes as $child ) {
if ( $child instanceof DOMElement ) {
++$count;
}
}

return $count;
}

private function looksLikeDocumentationLayout(DOMElement $element): bool
{
$name = strtolower(trim($this->attr($element, 'class') . ' ' . $this->attr($element, 'id')));
Expand Down Expand Up @@ -6565,20 +6340,6 @@ private function nonCoreImageFigureClassName(DOMElement $figure): string
return implode(' ', $classes);
}

private function mergeClassNames(string ...$classNames): string
{
$classes = array();
foreach ( $classNames as $className ) {
foreach ( preg_split('/\s+/', trim($className)) ?: array() as $class ) {
if ( '' !== $class && ! in_array($class, $classes, true) ) {
$classes[] = $class;
}
}
}

return implode(' ', $classes);
}

/**
* @return array<string, mixed>
*/
Expand Down Expand Up @@ -6687,15 +6448,6 @@ private function sanitizedSyntaxHtml(DOMElement $element): string
/**
* @param array<string, string> $attrs
*/
private function htmlAttributeString(array $attrs): string
{
$html = '';
foreach ( $attrs as $name => $value ) {
$html .= ' ' . $name . '="' . htmlspecialchars($value, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8') . '"';
}
return $html;
}

/**
* @return array<int, array<string, string>>
*/
Expand Down
Loading
Loading