diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index 10f3ee3e2dd0f..bd940eb8ac47d 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -44,6 +44,39 @@
* $processor->add_class( 'responsive-image' );
* }
*
+ * #### Reading ordinary text from a subtree
+ *
+ * Ordinary text extraction is usually a tree-aware operation, so use the HTML
+ * Processor and walk the subtree. Append only `#text` tokens unless the caller
+ * intentionally asks for another token type. Do not use
+ * {@see WP_HTML_Tag_Processor::get_modifiable_text} itself as a test for
+ * ordinary text, because comments and special elements can also carry
+ * modifiable text.
+ *
+ * Example:
+ *
+ * $processor = WP_HTML_Processor::create_fragment( $html );
+ * if ( $processor->next_tag( 'ARTICLE' ) ) {
+ * $article_depth = $processor->get_current_depth();
+ * $text = '';
+ *
+ * while ( $processor->next_token() && $processor->get_current_depth() >= $article_depth ) {
+ * if ( '#text' === $processor->get_token_type() ) {
+ * $text .= $processor->get_modifiable_text();
+ * }
+ * }
+ * }
+ *
+ * HTML elements whose contents cannot contain markup, such as SCRIPT, STYLE,
+ * TITLE, and TEXTAREA, do not expose their contents as child `#text` tokens.
+ * Their contents are available on the element token itself and should be read
+ * only when the caller specifically asks for that element's own contents.
+ *
+ * For read-only extraction, parser state such as
+ * {@see WP_HTML_Tag_Processor::paused_at_incomplete_token} or
+ * {@see WP_HTML_Processor::get_last_error} reports how the scan ended. Whether
+ * already-collected text is acceptable is the caller's policy.
+ *
* #### Breadcrumbs
*
* Breadcrumbs represent the stack of open elements from the root
@@ -5577,6 +5610,13 @@ public function class_list() {
* that a token has modifiable text, and a token with modifiable text may
* have an empty string (e.g. a comment with no contents).
*
+ * This method is not a predicate for ordinary text. For ordinary subtree
+ * text extraction, first require `get_token_type() === '#text'`, then read
+ * this method. HTML SCRIPT, STYLE, TITLE, and TEXTAREA contents are carried
+ * on the element token itself, with no child `#text` token; read that
+ * opener-carried text only when the caller specifically asks for the
+ * element's own contents.
+ *
* @since 6.6.0 Subclassed for the HTML Processor.
*
* @return string
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index e41e1120550b5..6ab7369eb0ba6 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -3639,6 +3639,15 @@ public function subdivide_text_appropriately(): bool {
* that a token has modifiable text, and a token with modifiable text may
* have an empty string (e.g. a comment with no contents).
*
+ * The returned string is already decoded where HTML decodes text: `#text`
+ * nodes, TITLE contents, and TEXTAREA contents return character references
+ * as the characters they represent, so `&` is returned as `&`. Do not
+ * decode the returned string again. SCRIPT, STYLE, and comment contents are
+ * returned verbatim because HTML does not decode character references there.
+ *
+ * The returned string is UTF-8. When measuring or slicing it by code points,
+ * pass an explicit encoding, for example `mb_strlen( $text, 'UTF-8' )`.
+ *
* Limitations:
*
* - This function will not strip the leading newline appropriately