From 54e8d0258167bce7efe55b50e2a91c316044231f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Tue, 19 May 2026 09:33:57 +0200
Subject: [PATCH 1/6] perf: literal-prefix capture-extraction fast path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For anchored patterns of the shape

    ^<literal-prefix-set>([^X]+)X.*$    with replacement `${1}` (or `$1`)

capture 1's bounds are structurally trivial — skip the prefix, find the
terminator with memchr — so the engine doesn't need to track captures
at all.

Two changes work together:

1. A new `LiteralPrefixCapture` strategy in `regex-automata`'s meta
   engine recognizes the shape via HIR walking (single-pattern only,
   anchored at both ends, default flags, ASCII terminator, finite
   literal-alternation prefix set capped at 32 variants). Strategy
   methods extract the match and capture-1 slots directly with memchr,
   bypassing PikeVM / BoundedBacktracker. Wires in alongside the
   existing reverse strategies.

2. `Regex::replacen` gets a borrowed-output fast path for replacements
   that are exactly `$N` / `${N}`. Detected via a new
   `Replacer::single_capture_ref` method (default `None`, opted into
   for `&str`/`String`/`Cow<str>`). For `limit == 1` with a match
   covering the whole haystack, returns `Cow::Borrowed` of the
   captured slice — no `Captures::expand`, no output string
   allocation.

Bench (500k synthetic Referer rows, 5-iter mean, on the same machine):

    Regex::replacen, q28 pattern, 80% match
        before:  281 ms
        after:    39 ms   (7.3x)

    Regex::replacen, ^key=([^,]+),.*$, 100% match
        before:  113 ms
        after:    27 ms   (4.2x)

Tests: 257 / 257 pass (regex-automata --lib + --test integration, regex
--test integration). No regressions.
---
 regex-automata/src/meta/strategy.rs | 367 +++++++++++++++++++++++++++-
 src/regex/string.rs                 | 103 ++++++++
 2 files changed, 468 insertions(+), 2 deletions(-)
diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index ebb876b2b8..297eb3d674 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -3,9 +3,9 @@ use core::{
     panic::{RefUnwindSafe, UnwindSafe},
 };
 
-use alloc::sync::Arc;
+use alloc::{boxed::Box, sync::Arc, vec, vec::Vec};
 
-use regex_syntax::hir::{literal, Hir};
+use regex_syntax::hir::{literal, Class, Hir, HirKind, Literal, Look};
 
 use crate::{
     meta::{
@@ -160,6 +160,13 @@ pub(super) fn new(
     // might give up or quit for reasons. If we had, e.g., a PikeVM that
     // supported reverse searching, then we could avoid building a full Core
     // engine for this case.
+    core = match LiteralPrefixCapture::new(core, hirs) {
+        Err(core) => core,
+        Ok(lpc) => {
+            debug!("using literal-prefix-capture strategy");
+            return Ok(Arc::new(lpc));
+        }
+    };
     core = match ReverseAnchored::new(core) {
         Err(core) => core,
         Ok(ra) => {
@@ -1903,3 +1910,359 @@ fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
         *slot = NonMaxUsize::new(m.end());
     }
 }
+
+/// A specialized strategy for anchored, fully-bounded regexes of the form
+///
+/// ```text
+///     ^<literal-prefix-set>([^X]+)X.*$
+/// ```
+///
+/// where the prefix reduces to a finite set of literal byte alternatives,
+/// the capture is a greedy `[^X]+` for a single ASCII byte X, and the trailing
+/// `.*$` is the standard "rest of line, then end of haystack" tail. The
+/// motivating instance is the ClickBench Q28 pattern
+/// `^https?://(?:www\.)?([^/]+)/.*$` -> `${1}`, but the recognizer applies to
+/// any pattern of this shape (single-literal prefixes, alternation, and
+/// `?`-optional segments).
+///
+/// For inputs that match, capture 1's bounds are structurally trivial — skip
+/// the prefix, find the terminator with `memchr` — so we can avoid the full
+/// engine's capture-tracking entirely. For inputs that don't match (e.g., a
+/// newline in the tail breaks `.*$`, or no prefix matches), we report no
+/// match: that result is identical to what the full engine would compute, so
+/// no fallback is required.
+#[derive(Debug)]
+struct LiteralPrefixCapture {
+    core: Core,
+    /// Distinct literal byte prefixes, longest-first so the runtime probe
+    /// is greedy. Bounded to `MAX_PREFIX_VARIANTS` at construction time.
+    prefixes: Box<[Box<[u8]>]>,
+    /// Single ASCII byte ending the capture (also the literal that must
+    /// follow the capture in the original regex).
+    terminator: u8,
+}
+
+/// Each `(?:...)?` doubles the count and each `(a|b|c)` multiplies it,
+/// so this caps the explosion for adversarial patterns. 32 fits roughly
+/// 8 levels of optional/alternation past Q28's 4 variants on one cache
+/// line of `Box<[u8]>`.
+const MAX_PREFIX_VARIANTS: usize = 32;
+
+impl LiteralPrefixCapture {
+    fn new(core: Core, hirs: &[&Hir]) -> Result<Self, Core> {
+        if hirs.len() != 1 {
+            return Err(core);
+        }
+        if !core.info.is_always_anchored_start()
+            || !core.info.is_always_anchored_end()
+        {
+            return Err(core);
+        }
+        // `.*$` excludes the line terminator; the runtime newline check
+        // hard-codes `b'\n'`, so reject non-default line terminators.
+        if core.info.config().get_line_terminator() != b'\n' {
+            return Err(core);
+        }
+        let Some((prefixes, terminator)) =
+            try_recognize_prefix_capture(hirs[0])
+        else {
+            return Err(core);
+        };
+        Ok(LiteralPrefixCapture { core, prefixes, terminator })
+    }
+
+    /// Returns capture 1's byte offsets if the input matches, else `None`.
+    /// The overall match always spans `0..input.haystack().len()` because
+    /// the regex is `^...$`.
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn try_fast_match(&self, input: &Input<'_>) -> Option<(usize, usize)> {
+        if input.start() != 0 || input.end() != input.haystack().len() {
+            return None;
+        }
+        let bytes = input.haystack();
+        for prefix in self.prefixes.iter() {
+            if !bytes.starts_with(prefix) {
+                continue;
+            }
+            let cap_start = prefix.len();
+            // Fused scan: the first byte that matters in the tail is either
+            // the terminator (success) or `\n` (failure for `.*$`).
+            let off = crate::util::memchr::memchr2(
+                self.terminator,
+                b'\n',
+                &bytes[cap_start..],
+            )?;
+            if bytes[cap_start + off] != self.terminator {
+                return None;
+            }
+            if off == 0 {
+                // `[^X]+` requires >= 1 byte; try a shorter prefix.
+                continue;
+            }
+            let cap_end = cap_start + off;
+            // Anything past the terminator must also be `\n`-free for
+            // `.*$` to reach end-of-haystack.
+            if crate::util::memchr::memchr(b'\n', &bytes[cap_end + 1..])
+                .is_some()
+            {
+                return None;
+            }
+            return Some((cap_start, cap_end));
+        }
+        None
+    }
+}
+
+impl Strategy for LiteralPrefixCapture {
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn group_info(&self) -> &GroupInfo {
+        self.core.group_info()
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn create_cache(&self) -> Cache {
+        self.core.create_cache()
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn reset_cache(&self, cache: &mut Cache) {
+        self.core.reset_cache(cache);
+    }
+
+    fn is_accelerated(&self) -> bool {
+        true
+    }
+
+    fn memory_usage(&self) -> usize {
+        let prefix_bytes: usize = self.prefixes.iter().map(|p| p.len()).sum();
+        self.core.memory_usage()
+            + self.prefixes.len() * core::mem::size_of::<Box<[u8]>>()
+            + prefix_bytes
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn search(&self, _cache: &mut Cache, input: &Input<'_>) -> Option<Match> {
+        self.try_fast_match(input)?;
+        Some(Match::new(PatternID::ZERO, 0..input.haystack().len()))
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn search_half(
+        &self,
+        _cache: &mut Cache,
+        input: &Input<'_>,
+    ) -> Option<HalfMatch> {
+        self.try_fast_match(input)?;
+        Some(HalfMatch::new(PatternID::ZERO, input.haystack().len()))
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn is_match(&self, _cache: &mut Cache, input: &Input<'_>) -> bool {
+        self.try_fast_match(input).is_some()
+    }
+
+    #[cfg_attr(feature = "perf-inline", inline(always))]
+    fn search_slots(
+        &self,
+        _cache: &mut Cache,
+        input: &Input<'_>,
+        slots: &mut [Option<NonMaxUsize>],
+    ) -> Option<PatternID> {
+        let (cap_start, cap_end) = self.try_fast_match(input)?;
+        let match_end = input.haystack().len();
+        if let Some(slot) = slots.get_mut(0) {
+            *slot = NonMaxUsize::new(0);
+        }
+        if let Some(slot) = slots.get_mut(1) {
+            *slot = NonMaxUsize::new(match_end);
+        }
+        if let Some(slot) = slots.get_mut(2) {
+            *slot = NonMaxUsize::new(cap_start);
+        }
+        if let Some(slot) = slots.get_mut(3) {
+            *slot = NonMaxUsize::new(cap_end);
+        }
+        Some(PatternID::ZERO)
+    }
+
+    fn which_overlapping_matches(
+        &self,
+        cache: &mut Cache,
+        input: &Input<'_>,
+        patset: &mut PatternSet,
+    ) {
+        self.core.which_overlapping_matches(cache, input, patset)
+    }
+}
+
+/// Recognizes `^<prefix-set>([^X]+)X.*$` (default flags) and returns the
+/// enumerated prefix set together with the terminator byte X.
+fn try_recognize_prefix_capture(hir: &Hir) -> Option<(Box<[Box<[u8]>]>, u8)> {
+    let HirKind::Concat(parts) = hir.kind() else {
+        return None;
+    };
+    let mut iter = parts.iter();
+
+    // Multiline `(?m)` lowers `^` to `Look::StartLF`, which would break
+    // the byte-level fast path; require text-start specifically.
+    if !matches!(iter.next()?.kind(), HirKind::Look(Look::Start)) {
+        return None;
+    }
+
+    let mut prefixes: Vec<Vec<u8>> = vec![Vec::new()];
+    let capture = loop {
+        let part = iter.next()?;
+        if matches!(part.kind(), HirKind::Capture(_)) {
+            break part;
+        }
+        extend_prefix(&mut prefixes, part)?;
+        if prefixes.len() > MAX_PREFIX_VARIANTS {
+            return None;
+        }
+    };
+
+    let HirKind::Capture(cap) = capture.kind() else { unreachable!() };
+    if cap.index != 1 {
+        return None;
+    }
+    let terminator = capture_terminator_byte(&cap.sub)?;
+
+    let HirKind::Literal(Literal(lit)) = iter.next()?.kind() else {
+        return None;
+    };
+    if lit.as_ref() != [terminator] {
+        return None;
+    }
+
+    if !is_dot_star(iter.next()?) {
+        return None;
+    }
+
+    if !matches!(iter.next()?.kind(), HirKind::Look(Look::End)) {
+        return None;
+    }
+    if iter.next().is_some() {
+        return None;
+    }
+
+    prefixes.sort_unstable();
+    prefixes.dedup();
+    let mut prefixes: Vec<Box<[u8]>> =
+        prefixes.into_iter().map(Vec::into_boxed_slice).collect();
+    prefixes.sort_unstable_by_key(|p| core::cmp::Reverse(p.len()));
+
+    Some((prefixes.into_boxed_slice(), terminator))
+}
+
+/// Extend the accumulator with one prefix segment. Returns `None` if the
+/// segment isn't a finite literal shape (literal / concat / alternation /
+/// `?`-optional combination of those).
+fn extend_prefix(variants: &mut Vec<Vec<u8>>, hir: &Hir) -> Option<()> {
+    match hir.kind() {
+        HirKind::Literal(Literal(bytes)) => {
+            for v in variants.iter_mut() {
+                v.extend_from_slice(bytes);
+            }
+            Some(())
+        }
+        HirKind::Concat(parts) => {
+            for part in parts {
+                extend_prefix(variants, part)?;
+                if variants.len() > MAX_PREFIX_VARIANTS {
+                    return None;
+                }
+            }
+            Some(())
+        }
+        HirKind::Repetition(rep) if rep.min == 0 && rep.max == Some(1) => {
+            let mut with = variants.clone();
+            extend_prefix(&mut with, &rep.sub)?;
+            if variants.len() + with.len() > MAX_PREFIX_VARIANTS {
+                return None;
+            }
+            variants.extend(with);
+            Some(())
+        }
+        HirKind::Alternation(branches) => {
+            let base = core::mem::take(variants);
+            for branch in branches {
+                let mut local = base.clone();
+                extend_prefix(&mut local, branch)?;
+                if variants.len() + local.len() > MAX_PREFIX_VARIANTS {
+                    return None;
+                }
+                variants.extend(local);
+            }
+            Some(())
+        }
+        _ => None,
+    }
+}
+
+/// Capture must be a greedy `[^X]+` over a single ASCII byte X.
+fn capture_terminator_byte(hir: &Hir) -> Option<u8> {
+    let HirKind::Repetition(rep) = hir.kind() else {
+        return None;
+    };
+    if rep.min < 1 || rep.max.is_some() || !rep.greedy {
+        return None;
+    }
+    let HirKind::Class(class) = rep.sub.kind() else {
+        return None;
+    };
+    single_excluded_ascii_byte(class)
+}
+
+/// `.*` for default-flag regexes: any byte except `\n`, zero or more, greedy.
+fn is_dot_star(hir: &Hir) -> bool {
+    let HirKind::Repetition(rep) = hir.kind() else {
+        return false;
+    };
+    if rep.min != 0 || rep.max.is_some() || !rep.greedy {
+        return false;
+    }
+    let HirKind::Class(class) = rep.sub.kind() else {
+        return false;
+    };
+    single_excluded_ascii_byte(class) == Some(b'\n')
+}
+
+/// Returns `Some(b)` iff `class` matches every codepoint or byte except a
+/// single ASCII byte `b`. ASCII-only because the runtime matcher uses
+/// `memchr` over byte slices.
+fn single_excluded_ascii_byte(class: &Class) -> Option<u8> {
+    match class {
+        Class::Unicode(uc) => {
+            let ranges = uc.ranges();
+            if ranges.len() != 2 {
+                return None;
+            }
+            let (r0, r1) = (&ranges[0], &ranges[1]);
+            if (r0.start() as u32) != 0 || (r1.end() as u32) != 0x10FFFF {
+                return None;
+            }
+            let gap_start = r0.end() as u32 + 1;
+            let gap_end = r1.start() as u32 - 1;
+            if gap_start != gap_end || gap_start > 0x7F {
+                return None;
+            }
+            Some(gap_start as u8)
+        }
+        Class::Bytes(bc) => {
+            let ranges = bc.ranges();
+            if ranges.len() != 2 {
+                return None;
+            }
+            let (r0, r1) = (&ranges[0], &ranges[1]);
+            if r0.start() != 0 || r1.end() != 0xFF {
+                return None;
+            }
+            let gap_start = r0.end() as u16 + 1;
+            let gap_end = r1.start() as u16 - 1;
+            if gap_start != gap_end || gap_start > 0x7F {
+                return None;
+            }
+            Some(gap_start as u8)
+        }
+    }
+}
diff --git a/src/regex/string.rs b/src/regex/string.rs
index e066d7630c..ba8acea961 100644
--- a/src/regex/string.rs
+++ b/src/regex/string.rs
@@ -938,6 +938,55 @@ impl Regex {
             return Cow::Owned(new);
         }
 
+        // When the replacement is exactly a single capture reference
+        // (`$N` / `${N}`), each match's output is just the captured slice
+        // — no `Captures::expand`. For a single match covering the whole
+        // haystack (common with anchored regexes), this returns a
+        // `Cow::Borrowed` with no output allocation at all.
+        if let Some(group_idx) = rep.single_capture_ref() {
+            if limit == 1 {
+                let Some(cap) = self.captures(haystack) else {
+                    return Cow::Borrowed(haystack);
+                };
+                let m = cap.get(0).unwrap();
+                let g = cap.get(group_idx);
+                if m.start() == 0 && m.end() == haystack.len() {
+                    return match g {
+                        Some(g) => {
+                            Cow::Borrowed(&haystack[g.start()..g.end()])
+                        }
+                        None => Cow::Borrowed(""),
+                    };
+                }
+                let mut new = String::with_capacity(haystack.len());
+                new.push_str(&haystack[..m.start()]);
+                if let Some(g) = g {
+                    new.push_str(&haystack[g.start()..g.end()]);
+                }
+                new.push_str(&haystack[m.end()..]);
+                return Cow::Owned(new);
+            }
+            let mut it = self.captures_iter(haystack).enumerate().peekable();
+            if it.peek().is_none() {
+                return Cow::Borrowed(haystack);
+            }
+            let mut new = String::with_capacity(haystack.len());
+            let mut last_match = 0;
+            for (i, cap) in it {
+                let m = cap.get(0).unwrap();
+                new.push_str(&haystack[last_match..m.start()]);
+                if let Some(g) = cap.get(group_idx) {
+                    new.push_str(&haystack[g.start()..g.end()]);
+                }
+                last_match = m.end();
+                if limit > 0 && i + 1 >= limit {
+                    break;
+                }
+            }
+            new.push_str(&haystack[last_match..]);
+            return Cow::Owned(new);
+        }
+
         // The slower path, which we use if the replacement may need access to
         // capture groups.
         let mut it = self.captures_iter(haystack).enumerate().peekable();
@@ -2470,6 +2519,17 @@ pub trait Replacer {
         None
     }
 
+    /// Returns `Some(group_index)` if this replacement is *exactly* a single
+    /// capture reference (`$N` or `${N}`) with no surrounding text.
+    ///
+    /// Replacement routines use this to skip [`Captures::expand`] entirely
+    /// — each match's output is just the captured slice, and when the
+    /// match covers the whole haystack the result is a `Cow::Borrowed`
+    /// with no output allocation.
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        None
+    }
+
     /// Returns a type that implements `Replacer`, but that borrows and wraps
     /// this `Replacer`.
     ///
@@ -2505,6 +2565,10 @@ impl<'a> Replacer for &'a str {
     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
         no_expansion(self)
     }
+
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        single_capture_ref(self)
+    }
 }
 
 impl<'a> Replacer for &'a String {
@@ -2515,6 +2579,10 @@ impl<'a> Replacer for &'a String {
     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
         no_expansion(self)
     }
+
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        single_capture_ref(self)
+    }
 }
 
 impl Replacer for String {
@@ -2525,6 +2593,10 @@ impl Replacer for String {
     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
         no_expansion(self)
     }
+
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        single_capture_ref(self)
+    }
 }
 
 impl<'a> Replacer for Cow<'a, str> {
@@ -2535,6 +2607,10 @@ impl<'a> Replacer for Cow<'a, str> {
     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
         no_expansion(self)
     }
+
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        single_capture_ref(self)
+    }
 }
 
 impl<'a> Replacer for &'a Cow<'a, str> {
@@ -2545,6 +2621,10 @@ impl<'a> Replacer for &'a Cow<'a, str> {
     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
         no_expansion(self)
     }
+
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        single_capture_ref(self)
+    }
 }
 
 impl<F, T> Replacer for F
@@ -2623,3 +2703,26 @@ fn no_expansion<T: AsRef<str>>(replacement: &T) -> Option<Cow<'_, str>> {
         None => Some(Cow::Borrowed(replacement)),
     }
 }
+
+/// Returns `Some(N)` iff the replacement is *exactly* a single capture
+/// reference of the form `$N` or `${N}` for a numeric group index. Named
+/// refs (`${name}`) and `$$` (escaped `$`) are rejected.
+///
+/// This is meant to be used to implement the [`Replacer::single_capture_ref`]
+/// method in its various trait impls.
+fn single_capture_ref<T: AsRef<str>>(replacement: &T) -> Option<usize> {
+    let rest = replacement.as_ref().strip_prefix('$')?;
+    if rest.starts_with('$') {
+        return None;
+    }
+    let digits = match rest.strip_prefix('{') {
+        Some(inner) => inner.strip_suffix('}')?,
+        None => rest,
+    };
+    // `parse::<usize>` accepts a leading `+`, which would let `${+1}`
+    // masquerade as group 1; check explicitly.
+    if digits.is_empty() || digits.bytes().any(|b| !b.is_ascii_digit()) {
+        return None;
+    }
+    digits.parse().ok()
+}

From 73262c9df12b501b553e7889727ed4df3bbd33e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Thu, 28 May 2026 12:12:45 +0200
Subject: [PATCH 2/6] Fix literal prefix capture fast path correctness

---
 regex-automata/src/meta/strategy.rs | 76 ++++++++++++++++++-----------
 src/regex/string.rs                 | 26 ++++++----
 tests/misc.rs                       | 18 +++++++
 tests/replace.rs                    | 38 +++++++++++++++
 4 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index 297eb3d674..95e3315ed5 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -1934,8 +1934,8 @@ fn copy_match_to_slots(m: Match, slots: &mut [Option<NonMaxUsize>]) {
 #[derive(Debug)]
 struct LiteralPrefixCapture {
     core: Core,
-    /// Distinct literal byte prefixes, longest-first so the runtime probe
-    /// is greedy. Bounded to `MAX_PREFIX_VARIANTS` at construction time.
+    /// Distinct literal byte prefixes in regex-priority order. Bounded to
+    /// `MAX_PREFIX_VARIANTS` at construction time.
     prefixes: Box<[Box<[u8]>]>,
     /// Single ASCII byte ending the capture (also the literal that must
     /// follow the capture in the original regex).
@@ -1953,6 +1953,9 @@ impl LiteralPrefixCapture {
         if hirs.len() != 1 {
             return Err(core);
         }
+        if core.info.config().get_match_kind() != MatchKind::LeftmostFirst {
+            return Err(core);
+        }
         if !core.info.is_always_anchored_start()
             || !core.info.is_always_anchored_end()
         {
@@ -1963,8 +1966,9 @@ impl LiteralPrefixCapture {
         if core.info.config().get_line_terminator() != b'\n' {
             return Err(core);
         }
+        let allow_unicode_classes = core.info.config().get_utf8_empty();
         let Some((prefixes, terminator)) =
-            try_recognize_prefix_capture(hirs[0])
+            try_recognize_prefix_capture(hirs[0], allow_unicode_classes)
         else {
             return Err(core);
         };
@@ -1985,18 +1989,14 @@ impl LiteralPrefixCapture {
                 continue;
             }
             let cap_start = prefix.len();
-            // Fused scan: the first byte that matters in the tail is either
-            // the terminator (success) or `\n` (failure for `.*$`).
-            let off = crate::util::memchr::memchr2(
+            let Some(off) = crate::util::memchr::memchr(
                 self.terminator,
-                b'\n',
                 &bytes[cap_start..],
-            )?;
-            if bytes[cap_start + off] != self.terminator {
-                return None;
-            }
+            ) else {
+                continue;
+            };
             if off == 0 {
-                // `[^X]+` requires >= 1 byte; try a shorter prefix.
+                // `[^X]+` requires >= 1 byte; try the next possible prefix.
                 continue;
             }
             let cap_end = cap_start + off;
@@ -2005,7 +2005,7 @@ impl LiteralPrefixCapture {
             if crate::util::memchr::memchr(b'\n', &bytes[cap_end + 1..])
                 .is_some()
             {
-                return None;
+                continue;
             }
             return Some((cap_start, cap_end));
         }
@@ -2097,7 +2097,10 @@ impl Strategy for LiteralPrefixCapture {
 
 /// Recognizes `^<prefix-set>([^X]+)X.*$` (default flags) and returns the
 /// enumerated prefix set together with the terminator byte X.
-fn try_recognize_prefix_capture(hir: &Hir) -> Option<(Box<[Box<[u8]>]>, u8)> {
+fn try_recognize_prefix_capture(
+    hir: &Hir,
+    allow_unicode_classes: bool,
+) -> Option<(Box<[Box<[u8]>]>, u8)> {
     let HirKind::Concat(parts) = hir.kind() else {
         return None;
     };
@@ -2125,7 +2128,7 @@ fn try_recognize_prefix_capture(hir: &Hir) -> Option<(Box<[Box<[u8]>]>, u8)> {
     if cap.index != 1 {
         return None;
     }
-    let terminator = capture_terminator_byte(&cap.sub)?;
+    let terminator = capture_terminator_byte(&cap.sub, allow_unicode_classes)?;
 
     let HirKind::Literal(Literal(lit)) = iter.next()?.kind() else {
         return None;
@@ -2134,7 +2137,7 @@ fn try_recognize_prefix_capture(hir: &Hir) -> Option<(Box<[Box<[u8]>]>, u8)> {
         return None;
     }
 
-    if !is_dot_star(iter.next()?) {
+    if !is_dot_star(iter.next()?, allow_unicode_classes) {
         return None;
     }
 
@@ -2145,11 +2148,14 @@ fn try_recognize_prefix_capture(hir: &Hir) -> Option<(Box<[Box<[u8]>]>, u8)> {
         return None;
     }
 
-    prefixes.sort_unstable();
-    prefixes.dedup();
-    let mut prefixes: Vec<Box<[u8]>> =
-        prefixes.into_iter().map(Vec::into_boxed_slice).collect();
-    prefixes.sort_unstable_by_key(|p| core::cmp::Reverse(p.len()));
+    let mut deduped = Vec::with_capacity(prefixes.len());
+    for prefix in prefixes {
+        if !deduped.iter().any(|seen| seen == &prefix) {
+            deduped.push(prefix);
+        }
+    }
+    let prefixes: Vec<Box<[u8]>> =
+        deduped.into_iter().map(Vec::into_boxed_slice).collect();
 
     Some((prefixes.into_boxed_slice(), terminator))
 }
@@ -2180,7 +2186,12 @@ fn extend_prefix(variants: &mut Vec<Vec<u8>>, hir: &Hir) -> Option<()> {
             if variants.len() + with.len() > MAX_PREFIX_VARIANTS {
                 return None;
             }
-            variants.extend(with);
+            if rep.greedy {
+                let without = core::mem::replace(variants, with);
+                variants.extend(without);
+            } else {
+                variants.extend(with);
+            }
             Some(())
         }
         HirKind::Alternation(branches) => {
@@ -2200,21 +2211,24 @@ fn extend_prefix(variants: &mut Vec<Vec<u8>>, hir: &Hir) -> Option<()> {
 }
 
 /// Capture must be a greedy `[^X]+` over a single ASCII byte X.
-fn capture_terminator_byte(hir: &Hir) -> Option<u8> {
+fn capture_terminator_byte(
+    hir: &Hir,
+    allow_unicode_classes: bool,
+) -> Option<u8> {
     let HirKind::Repetition(rep) = hir.kind() else {
         return None;
     };
-    if rep.min < 1 || rep.max.is_some() || !rep.greedy {
+    if rep.min != 1 || rep.max.is_some() || !rep.greedy {
         return None;
     }
     let HirKind::Class(class) = rep.sub.kind() else {
         return None;
     };
-    single_excluded_ascii_byte(class)
+    single_excluded_ascii_byte(class, allow_unicode_classes)
 }
 
 /// `.*` for default-flag regexes: any byte except `\n`, zero or more, greedy.
-fn is_dot_star(hir: &Hir) -> bool {
+fn is_dot_star(hir: &Hir, allow_unicode_classes: bool) -> bool {
     let HirKind::Repetition(rep) = hir.kind() else {
         return false;
     };
@@ -2224,15 +2238,21 @@ fn is_dot_star(hir: &Hir) -> bool {
     let HirKind::Class(class) = rep.sub.kind() else {
         return false;
     };
-    single_excluded_ascii_byte(class) == Some(b'\n')
+    single_excluded_ascii_byte(class, allow_unicode_classes) == Some(b'\n')
 }
 
 /// Returns `Some(b)` iff `class` matches every codepoint or byte except a
 /// single ASCII byte `b`. ASCII-only because the runtime matcher uses
 /// `memchr` over byte slices.
-fn single_excluded_ascii_byte(class: &Class) -> Option<u8> {
+fn single_excluded_ascii_byte(
+    class: &Class,
+    allow_unicode_classes: bool,
+) -> Option<u8> {
     match class {
         Class::Unicode(uc) => {
+            if !allow_unicode_classes {
+                return None;
+            }
             let ranges = uc.ranges();
             if ranges.len() != 2 {
                 return None;
diff --git a/src/regex/string.rs b/src/regex/string.rs
index ba8acea961..6b3aa0527e 100644
--- a/src/regex/string.rs
+++ b/src/regex/string.rs
@@ -939,10 +939,8 @@ impl Regex {
         }
 
         // When the replacement is exactly a single capture reference
-        // (`$N` / `${N}`), each match's output is just the captured slice
-        // — no `Captures::expand`. For a single match covering the whole
-        // haystack (common with anchored regexes), this returns a
-        // `Cow::Borrowed` with no output allocation at all.
+        // (`$N` / `${N}`), each match's output is just the captured slice,
+        // so we can skip `Captures::expand`.
         if let Some(group_idx) = rep.single_capture_ref() {
             if limit == 1 {
                 let Some(cap) = self.captures(haystack) else {
@@ -953,9 +951,15 @@ impl Regex {
                 if m.start() == 0 && m.end() == haystack.len() {
                     return match g {
                         Some(g) => {
-                            Cow::Borrowed(&haystack[g.start()..g.end()])
+                            if g.start() == 0 && g.end() == haystack.len() {
+                                Cow::Borrowed(haystack)
+                            } else {
+                                Cow::Owned(String::from(
+                                    &haystack[g.start()..g.end()],
+                                ))
+                            }
                         }
-                        None => Cow::Borrowed(""),
+                        None => Cow::Owned(String::new()),
                     };
                 }
                 let mut new = String::with_capacity(haystack.len());
@@ -2522,10 +2526,8 @@ pub trait Replacer {
     /// Returns `Some(group_index)` if this replacement is *exactly* a single
     /// capture reference (`$N` or `${N}`) with no surrounding text.
     ///
-    /// Replacement routines use this to skip [`Captures::expand`] entirely
-    /// — each match's output is just the captured slice, and when the
-    /// match covers the whole haystack the result is a `Cow::Borrowed`
-    /// with no output allocation.
+    /// Replacement routines use this to skip [`Captures::expand`] entirely:
+    /// each match's output is just the captured slice.
     fn single_capture_ref(&mut self) -> Option<usize> {
         None
     }
@@ -2654,6 +2656,10 @@ impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
     fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
         self.0.no_expansion()
     }
+
+    fn single_capture_ref(&mut self) -> Option<usize> {
+        self.0.single_capture_ref()
+    }
 }
 
 /// A helper type for forcing literal string replacement.
diff --git a/tests/misc.rs b/tests/misc.rs
index c04c9c9fe2..1b3329eee0 100644
--- a/tests/misc.rs
+++ b/tests/misc.rs
@@ -141,3 +141,21 @@ fn dfa_handles_pathological_case() {
     };
     assert!(re.is_match(&text));
 }
+
+#[test]
+fn literal_prefix_capture_requires_exact_plus() {
+    let re = regex!(r"^a([^/]{2,})/.*$");
+    assert!(!re.is_match("ab/x"));
+}
+
+#[test]
+fn literal_prefix_capture_bytes_unicode_rejects_invalid_utf8() {
+    let re = regex::bytes::Regex::new(r"^a([^/]+)/.*$").unwrap();
+    assert!(!re.is_match(b"a\xFF/x"));
+
+    let re = regex::bytes::RegexBuilder::new(r"^a([^/]+)/.*$")
+        .unicode(false)
+        .build()
+        .unwrap();
+    assert!(re.is_match(b"a\xFF/x"));
+}
diff --git a/tests/replace.rs b/tests/replace.rs
index f26ae46030..b656decf63 100644
--- a/tests/replace.rs
+++ b/tests/replace.rs
@@ -181,3 +181,41 @@ fn replacen_with_captures() {
     let re = regex::Regex::new(r"([0-9])").unwrap();
     assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34");
 }
+
+#[test]
+fn replace_single_capture_ref_borrow_contract() {
+    let re = regex::Regex::new(r"^a([^/]+)/.*$").unwrap();
+    let got = re.replace("abc/x", "$1");
+    assert_eq!(got, "bc");
+    assert!(matches!(got, std::borrow::Cow::Owned(_)));
+}
+
+#[test]
+fn replace_literal_prefix_capture_allows_newline_in_capture() {
+    let re = regex::Regex::new(r"^a([^/]+)/.*$").unwrap();
+    assert_eq!(re.replace("a\nb/x", "$1"), "\nb");
+}
+
+#[test]
+fn replace_literal_prefix_capture_respects_prefix_priority() {
+    let re = regex::Regex::new(r"^(?:a|ab)([^/]+)/.*$").unwrap();
+    assert_eq!(re.replace("abc/x", "$1"), "bc");
+}
+
+#[test]
+fn replace_literal_prefix_capture_respects_ungreedy_optional() {
+    let re = regex::Regex::new(r"^a??([^/]+)/.*$").unwrap();
+    assert_eq!(re.replace("abc/x", "$1"), "abc");
+}
+
+#[test]
+fn replace_literal_prefix_capture_backtracks_greedy_optional() {
+    let re = regex::Regex::new(r"^a?([^/]+)/.*$").unwrap();
+    assert_eq!(re.replace("a/x", "$1"), "a");
+}
+
+#[test]
+fn replace_literal_prefix_capture_backtracks_after_tail_newline() {
+    let re = regex::Regex::new(r"^(?:a|ab/b\nc)([^/]+)/.*$").unwrap();
+    assert_eq!(re.replace("ab/b\ncd/e", "$1"), "d");
+}

From b73bfb8ac41b09ecc0099ae97e378c68059b6ea4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Thu, 28 May 2026 12:27:18 +0200
Subject: [PATCH 3/6] Recover literal prefix capture scan speed

---
 regex-automata/src/meta/strategy.rs | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index 95e3315ed5..9b130a2d76 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -1984,16 +1984,25 @@ impl LiteralPrefixCapture {
             return None;
         }
         let bytes = input.haystack();
-        for prefix in self.prefixes.iter() {
+        'prefix: for prefix in self.prefixes.iter() {
             if !bytes.starts_with(prefix) {
                 continue;
             }
             let cap_start = prefix.len();
-            let Some(off) = crate::util::memchr::memchr(
-                self.terminator,
-                &bytes[cap_start..],
-            ) else {
-                continue;
+            let mut scan_start = cap_start;
+            let off = loop {
+                let Some(next) = crate::util::memchr::memchr2(
+                    self.terminator,
+                    b'\n',
+                    &bytes[scan_start..],
+                ) else {
+                    continue 'prefix;
+                };
+                let found = scan_start + next;
+                if bytes[found] == self.terminator {
+                    break found - cap_start;
+                }
+                scan_start = found + 1;
             };
             if off == 0 {
                 // `[^X]+` requires >= 1 byte; try the next possible prefix.

From 87dcb2ca2c68d44c6c62d9365e70bc8ab4b1c3fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Thu, 28 May 2026 12:46:36 +0200
Subject: [PATCH 4/6] Fix literal prefix capture fast path correctness

---
 regex-automata/src/meta/strategy.rs | 75 +++++++++++++++++------------
 tests/misc.rs                       | 10 ++++
 tests/replace.rs                    |  6 +++
 3 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index 9b130a2d76..66344ab389 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -1980,6 +1980,11 @@ impl LiteralPrefixCapture {
     /// the regex is `^...$`.
     #[cfg_attr(feature = "perf-inline", inline(always))]
     fn try_fast_match(&self, input: &Input<'_>) -> Option<(usize, usize)> {
+        if let Some(pid) = input.get_anchored().pattern() {
+            if pid != PatternID::ZERO {
+                return None;
+            }
+        }
         if input.start() != 0 || input.end() != input.haystack().len() {
             return None;
         }
@@ -2127,10 +2132,7 @@ fn try_recognize_prefix_capture(
         if matches!(part.kind(), HirKind::Capture(_)) {
             break part;
         }
-        extend_prefix(&mut prefixes, part)?;
-        if prefixes.len() > MAX_PREFIX_VARIANTS {
-            return None;
-        }
+        prefixes = concat_prefix_variants(prefixes, prefix_variants(part)?)?;
     };
 
     let HirKind::Capture(cap) = capture.kind() else { unreachable!() };
@@ -2169,56 +2171,69 @@ fn try_recognize_prefix_capture(
     Some((prefixes.into_boxed_slice(), terminator))
 }
 
-/// Extend the accumulator with one prefix segment. Returns `None` if the
-/// segment isn't a finite literal shape (literal / concat / alternation /
-/// `?`-optional combination of those).
-fn extend_prefix(variants: &mut Vec<Vec<u8>>, hir: &Hir) -> Option<()> {
+/// Return all literal variants for one prefix segment in regex-priority
+/// order. Returns `None` if the segment isn't a finite literal shape
+/// (literal / concat / alternation / `?`-optional combination of those).
+fn prefix_variants(hir: &Hir) -> Option<Vec<Vec<u8>>> {
     match hir.kind() {
-        HirKind::Literal(Literal(bytes)) => {
-            for v in variants.iter_mut() {
-                v.extend_from_slice(bytes);
-            }
-            Some(())
-        }
+        HirKind::Literal(Literal(bytes)) => Some(vec![bytes.to_vec()]),
         HirKind::Concat(parts) => {
+            let mut variants = vec![Vec::new()];
             for part in parts {
-                extend_prefix(variants, part)?;
-                if variants.len() > MAX_PREFIX_VARIANTS {
-                    return None;
-                }
+                variants =
+                    concat_prefix_variants(variants, prefix_variants(part)?)?;
             }
-            Some(())
+            Some(variants)
         }
         HirKind::Repetition(rep) if rep.min == 0 && rep.max == Some(1) => {
-            let mut with = variants.clone();
-            extend_prefix(&mut with, &rep.sub)?;
-            if variants.len() + with.len() > MAX_PREFIX_VARIANTS {
+            let mut variants = prefix_variants(&rep.sub)?;
+            if variants.len() + 1 > MAX_PREFIX_VARIANTS {
                 return None;
             }
             if rep.greedy {
-                let without = core::mem::replace(variants, with);
-                variants.extend(without);
+                variants.push(Vec::new());
             } else {
-                variants.extend(with);
+                variants.insert(0, Vec::new());
             }
-            Some(())
+            Some(variants)
         }
         HirKind::Alternation(branches) => {
-            let base = core::mem::take(variants);
+            let mut variants = Vec::new();
             for branch in branches {
-                let mut local = base.clone();
-                extend_prefix(&mut local, branch)?;
+                let local = prefix_variants(branch)?;
                 if variants.len() + local.len() > MAX_PREFIX_VARIANTS {
                     return None;
                 }
                 variants.extend(local);
             }
-            Some(())
+            Some(variants)
         }
         _ => None,
     }
 }
 
+/// Concatenate two already-prioritized prefix variant lists. For regex
+/// concatenation, every suffix priority is exhausted before backtracking to
+/// the next prefix priority.
+fn concat_prefix_variants(
+    prefixes: Vec<Vec<u8>>,
+    suffixes: Vec<Vec<u8>>,
+) -> Option<Vec<Vec<u8>>> {
+    if prefixes.len().checked_mul(suffixes.len())? > MAX_PREFIX_VARIANTS {
+        return None;
+    }
+    let mut variants = Vec::with_capacity(prefixes.len() * suffixes.len());
+    for prefix in prefixes {
+        for suffix in &suffixes {
+            let mut variant = Vec::with_capacity(prefix.len() + suffix.len());
+            variant.extend_from_slice(&prefix);
+            variant.extend_from_slice(suffix);
+            variants.push(variant);
+        }
+    }
+    Some(variants)
+}
+
 /// Capture must be a greedy `[^X]+` over a single ASCII byte X.
 fn capture_terminator_byte(
     hir: &Hir,
diff --git a/tests/misc.rs b/tests/misc.rs
index 1b3329eee0..f6f46e5948 100644
--- a/tests/misc.rs
+++ b/tests/misc.rs
@@ -148,6 +148,16 @@ fn literal_prefix_capture_requires_exact_plus() {
     assert!(!re.is_match("ab/x"));
 }
 
+#[test]
+fn literal_prefix_capture_respects_invalid_pattern_id() {
+    let re = regex_automata::meta::Regex::new(r"^a([^/]+)/.*$").unwrap();
+    let input = regex_automata::Input::new("abc/x").anchored(
+        regex_automata::Anchored::Pattern(regex_automata::PatternID::must(1)),
+    );
+    let mut cache = re.create_cache();
+    assert_eq!(None, re.search_with(&mut cache, &input));
+}
+
 #[test]
 fn literal_prefix_capture_bytes_unicode_rejects_invalid_utf8() {
     let re = regex::bytes::Regex::new(r"^a([^/]+)/.*$").unwrap();
diff --git a/tests/replace.rs b/tests/replace.rs
index b656decf63..8faa5ff42e 100644
--- a/tests/replace.rs
+++ b/tests/replace.rs
@@ -214,6 +214,12 @@ fn replace_literal_prefix_capture_backtracks_greedy_optional() {
     assert_eq!(re.replace("a/x", "$1"), "a");
 }
 
+#[test]
+fn replace_literal_prefix_capture_concat_optional_priority() {
+    let re = regex::Regex::new(r"^a?(?:ab)?([^/]+)/.*$").unwrap();
+    assert_eq!(re.replace("abx/y", "$1"), "bx");
+}
+
 #[test]
 fn replace_literal_prefix_capture_backtracks_after_tail_newline() {
     let re = regex::Regex::new(r"^(?:a|ab/b\nc)([^/]+)/.*$").unwrap();

From 254c3d51f7800f24f073327e4df07dc58c59adda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Thu, 28 May 2026 13:01:59 +0200
Subject: [PATCH 5/6] Validate UTF-8 in literal prefix capture fast path

---
 regex-automata/src/meta/strategy.rs | 56 ++++++++++++++++++++---------
 tests/misc.rs                       |  6 ++++
 2 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index 66344ab389..b8843f4aa5 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -1940,6 +1940,9 @@ struct LiteralPrefixCapture {
     /// Single ASCII byte ending the capture (also the literal that must
     /// follow the capture in the original regex).
     terminator: u8,
+    /// Whether capture/tail classes were Unicode classes. When true, the
+    /// byte fast path must still reject invalid UTF-8 haystacks.
+    requires_valid_utf8: bool,
 }
 
 /// Each `(?:...)?` doubles the count and each `(a|b|c)` multiplies it,
@@ -1967,12 +1970,17 @@ impl LiteralPrefixCapture {
             return Err(core);
         }
         let allow_unicode_classes = core.info.config().get_utf8_empty();
-        let Some((prefixes, terminator)) =
+        let Some((prefixes, terminator, requires_valid_utf8)) =
             try_recognize_prefix_capture(hirs[0], allow_unicode_classes)
         else {
             return Err(core);
         };
-        Ok(LiteralPrefixCapture { core, prefixes, terminator })
+        Ok(LiteralPrefixCapture {
+            core,
+            prefixes,
+            terminator,
+            requires_valid_utf8,
+        })
     }
 
     /// Returns capture 1's byte offsets if the input matches, else `None`.
@@ -2021,6 +2029,10 @@ impl LiteralPrefixCapture {
             {
                 continue;
             }
+            if self.requires_valid_utf8 && core::str::from_utf8(bytes).is_err()
+            {
+                return None;
+            }
             return Some((cap_start, cap_end));
         }
         None
@@ -2114,7 +2126,7 @@ impl Strategy for LiteralPrefixCapture {
 fn try_recognize_prefix_capture(
     hir: &Hir,
     allow_unicode_classes: bool,
-) -> Option<(Box<[Box<[u8]>]>, u8)> {
+) -> Option<(Box<[Box<[u8]>]>, u8, bool)> {
     let HirKind::Concat(parts) = hir.kind() else {
         return None;
     };
@@ -2139,7 +2151,8 @@ fn try_recognize_prefix_capture(
     if cap.index != 1 {
         return None;
     }
-    let terminator = capture_terminator_byte(&cap.sub, allow_unicode_classes)?;
+    let (terminator, capture_requires_utf8) =
+        capture_terminator_byte(&cap.sub, allow_unicode_classes)?;
 
     let HirKind::Literal(Literal(lit)) = iter.next()?.kind() else {
         return None;
@@ -2148,9 +2161,8 @@ fn try_recognize_prefix_capture(
         return None;
     }
 
-    if !is_dot_star(iter.next()?, allow_unicode_classes) {
-        return None;
-    }
+    let dot_star_requires_utf8 =
+        dot_star_requires_valid_utf8(iter.next()?, allow_unicode_classes)?;
 
     if !matches!(iter.next()?.kind(), HirKind::Look(Look::End)) {
         return None;
@@ -2168,7 +2180,8 @@ fn try_recognize_prefix_capture(
     let prefixes: Vec<Box<[u8]>> =
         deduped.into_iter().map(Vec::into_boxed_slice).collect();
 
-    Some((prefixes.into_boxed_slice(), terminator))
+    let requires_valid_utf8 = capture_requires_utf8 || dot_star_requires_utf8;
+    Some((prefixes.into_boxed_slice(), terminator, requires_valid_utf8))
 }
 
 /// Return all literal variants for one prefix segment in regex-priority
@@ -2238,7 +2251,7 @@ fn concat_prefix_variants(
 fn capture_terminator_byte(
     hir: &Hir,
     allow_unicode_classes: bool,
-) -> Option<u8> {
+) -> Option<(u8, bool)> {
     let HirKind::Repetition(rep) = hir.kind() else {
         return None;
     };
@@ -2252,17 +2265,26 @@ fn capture_terminator_byte(
 }
 
 /// `.*` for default-flag regexes: any byte except `\n`, zero or more, greedy.
-fn is_dot_star(hir: &Hir, allow_unicode_classes: bool) -> bool {
+fn dot_star_requires_valid_utf8(
+    hir: &Hir,
+    allow_unicode_classes: bool,
+) -> Option<bool> {
     let HirKind::Repetition(rep) = hir.kind() else {
-        return false;
+        return None;
     };
     if rep.min != 0 || rep.max.is_some() || !rep.greedy {
-        return false;
+        return None;
     }
     let HirKind::Class(class) = rep.sub.kind() else {
-        return false;
+        return None;
     };
-    single_excluded_ascii_byte(class, allow_unicode_classes) == Some(b'\n')
+    let (excluded, requires_valid_utf8) =
+        single_excluded_ascii_byte(class, allow_unicode_classes)?;
+    if excluded == b'\n' {
+        Some(requires_valid_utf8)
+    } else {
+        None
+    }
 }
 
 /// Returns `Some(b)` iff `class` matches every codepoint or byte except a
@@ -2271,7 +2293,7 @@ fn is_dot_star(hir: &Hir, allow_unicode_classes: bool) -> bool {
 fn single_excluded_ascii_byte(
     class: &Class,
     allow_unicode_classes: bool,
-) -> Option<u8> {
+) -> Option<(u8, bool)> {
     match class {
         Class::Unicode(uc) => {
             if !allow_unicode_classes {
@@ -2290,7 +2312,7 @@ fn single_excluded_ascii_byte(
             if gap_start != gap_end || gap_start > 0x7F {
                 return None;
             }
-            Some(gap_start as u8)
+            Some((gap_start as u8, true))
         }
         Class::Bytes(bc) => {
             let ranges = bc.ranges();
@@ -2306,7 +2328,7 @@ fn single_excluded_ascii_byte(
             if gap_start != gap_end || gap_start > 0x7F {
                 return None;
             }
-            Some(gap_start as u8)
+            Some((gap_start as u8, false))
         }
     }
 }
diff --git a/tests/misc.rs b/tests/misc.rs
index f6f46e5948..46cd71f7b1 100644
--- a/tests/misc.rs
+++ b/tests/misc.rs
@@ -158,6 +158,12 @@ fn literal_prefix_capture_respects_invalid_pattern_id() {
     assert_eq!(None, re.search_with(&mut cache, &input));
 }
 
+#[test]
+fn literal_prefix_capture_meta_unicode_rejects_invalid_utf8() {
+    let re = regex_automata::meta::Regex::new(r"^a([^/]+)/.*$").unwrap();
+    assert!(!re.is_match(regex_automata::Input::new(b"a\xFF/x")));
+}
+
 #[test]
 fn literal_prefix_capture_bytes_unicode_rejects_invalid_utf8() {
     let re = regex::bytes::Regex::new(r"^a([^/]+)/.*$").unwrap();

From 48018870207a456aa8518a5d0684659f396b68d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Thu, 28 May 2026 13:19:26 +0200
Subject: [PATCH 6/6] Recover UTF-8 string fast path

---
 regex-automata/src/meta/strategy.rs |  5 +++--
 regex-automata/src/util/search.rs   | 24 +++++++++++++++++++++
 src/regex/string.rs                 | 33 +++++++++++++++++++----------
 src/regexset/string.rs              |  7 +++---
 4 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs
index b8843f4aa5..17a525b13a 100644
--- a/regex-automata/src/meta/strategy.rs
+++ b/regex-automata/src/meta/strategy.rs
@@ -1997,6 +1997,8 @@ impl LiteralPrefixCapture {
             return None;
         }
         let bytes = input.haystack();
+        let must_validate_utf8 =
+            self.requires_valid_utf8 && !input.haystack_is_known_valid_utf8();
         'prefix: for prefix in self.prefixes.iter() {
             if !bytes.starts_with(prefix) {
                 continue;
@@ -2029,8 +2031,7 @@ impl LiteralPrefixCapture {
             {
                 continue;
             }
-            if self.requires_valid_utf8 && core::str::from_utf8(bytes).is_err()
-            {
+            if must_validate_utf8 && core::str::from_utf8(bytes).is_err() {
                 return None;
             }
             return Some((cap_start, cap_end));
diff --git a/regex-automata/src/util/search.rs b/regex-automata/src/util/search.rs
index 3ece11d155..7cc41a2983 100644
--- a/regex-automata/src/util/search.rs
+++ b/regex-automata/src/util/search.rs
@@ -104,6 +104,7 @@ pub struct Input<'h> {
     span: Span,
     anchored: Anchored,
     earliest: bool,
+    haystack_known_valid_utf8: bool,
 }
 
 impl<'h> Input<'h> {
@@ -120,6 +121,24 @@ impl<'h> Input<'h> {
             span: Span { start: 0, end: haystack.len() },
             anchored: Anchored::No,
             earliest: false,
+            haystack_known_valid_utf8: false,
+        }
+    }
+
+    /// Create a new search configuration for the given UTF-8 haystack.
+    ///
+    /// This is like [`Input::new`], but records the fact that the haystack is
+    /// already known to be valid UTF-8. This lets regex engines avoid
+    /// redundant UTF-8 validation when Unicode matching semantics require it.
+    #[inline]
+    pub fn new_utf8(haystack: &'h str) -> Input<'h> {
+        let haystack = haystack.as_bytes();
+        Input {
+            haystack,
+            span: Span { start: 0, end: haystack.len() },
+            anchored: Anchored::No,
+            earliest: false,
+            haystack_known_valid_utf8: true,
         }
     }
 
@@ -767,6 +786,11 @@ impl<'h> Input<'h> {
     pub fn is_char_boundary(&self, offset: usize) -> bool {
         utf8::is_boundary(self.haystack(), offset)
     }
+
+    #[inline]
+    pub(crate) fn haystack_is_known_valid_utf8(&self) -> bool {
+        self.haystack_known_valid_utf8
+    }
 }
 
 impl<'h> core::fmt::Debug for Input<'h> {
diff --git a/src/regex/string.rs b/src/regex/string.rs
index 6b3aa0527e..46f0d30eaf 100644
--- a/src/regex/string.rs
+++ b/src/regex/string.rs
@@ -262,7 +262,10 @@ impl Regex {
     /// ```
     #[inline]
     pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> {
-        Matches { haystack, it: self.meta.find_iter(haystack) }
+        Matches {
+            haystack,
+            it: self.meta.find_iter(Input::new_utf8(haystack)),
+        }
     }
 
     /// This routine searches for the first match of this regex in the haystack
@@ -421,7 +424,10 @@ impl Regex {
         &'r self,
         haystack: &'h str,
     ) -> CaptureMatches<'r, 'h> {
-        CaptureMatches { haystack, it: self.meta.captures_iter(haystack) }
+        CaptureMatches {
+            haystack,
+            it: self.meta.captures_iter(Input::new_utf8(haystack)),
+        }
     }
 
     /// Returns an iterator of substrings of the haystack given, delimited by a
@@ -551,7 +557,7 @@ impl Regex {
     /// ```
     #[inline]
     pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> {
-        Split { haystack, it: self.meta.split(haystack) }
+        Split { haystack, it: self.meta.split(Input::new_utf8(haystack)) }
     }
 
     /// Returns an iterator of at most `limit` substrings of the haystack
@@ -630,7 +636,10 @@ impl Regex {
         haystack: &'h str,
         limit: usize,
     ) -> SplitN<'r, 'h> {
-        SplitN { haystack, it: self.meta.splitn(haystack, limit) }
+        SplitN {
+            haystack,
+            it: self.meta.splitn(Input::new_utf8(haystack), limit),
+        }
     }
 
     /// Replaces the leftmost-first match in the given haystack with the
@@ -1088,8 +1097,9 @@ impl Regex {
         haystack: &str,
         start: usize,
     ) -> Option<usize> {
-        let input =
-            Input::new(haystack).earliest(true).span(start..haystack.len());
+        let input = Input::new_utf8(haystack)
+            .earliest(true)
+            .span(start..haystack.len());
         self.meta.search_half(&input).map(|hm| hm.offset())
     }
 
@@ -1122,8 +1132,9 @@ impl Regex {
     /// ```
     #[inline]
     pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
-        let input =
-            Input::new(haystack).earliest(true).span(start..haystack.len());
+        let input = Input::new_utf8(haystack)
+            .earliest(true)
+            .span(start..haystack.len());
         self.meta.search_half(&input).is_some()
     }
 
@@ -1160,7 +1171,7 @@ impl Regex {
         haystack: &'h str,
         start: usize,
     ) -> Option<Match<'h>> {
-        let input = Input::new(haystack).span(start..haystack.len());
+        let input = Input::new_utf8(haystack).span(start..haystack.len());
         self.meta
             .search(&input)
             .map(|m| Match::new(haystack, m.start(), m.end()))
@@ -1199,7 +1210,7 @@ impl Regex {
         haystack: &'h str,
         start: usize,
     ) -> Option<Captures<'h>> {
-        let input = Input::new(haystack).span(start..haystack.len());
+        let input = Input::new_utf8(haystack).span(start..haystack.len());
         let mut caps = self.meta.create_captures();
         self.meta.search_captures(&input, &mut caps);
         if caps.is_match() {
@@ -1290,7 +1301,7 @@ impl Regex {
         haystack: &'h str,
         start: usize,
     ) -> Option<Match<'h>> {
-        let input = Input::new(haystack).span(start..haystack.len());
+        let input = Input::new_utf8(haystack).span(start..haystack.len());
         self.meta.search_captures(&input, &mut locs.0);
         locs.0.get_match().map(|m| Match::new(haystack, m.start(), m.end()))
     }
diff --git a/src/regexset/string.rs b/src/regexset/string.rs
index 5126a4661e..c842063839 100644
--- a/src/regexset/string.rs
+++ b/src/regexset/string.rs
@@ -242,7 +242,8 @@ impl RegexSet {
     /// ```
     #[inline]
     pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
-        self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
+        self.meta
+            .is_match(Input::new_utf8(haystack).span(start..haystack.len()))
     }
 
     /// Returns the set of regexes that match in the given haystack.
@@ -323,7 +324,7 @@ impl RegexSet {
     /// ```
     #[inline]
     pub fn matches_at(&self, haystack: &str, start: usize) -> SetMatches {
-        let input = Input::new(haystack).span(start..haystack.len());
+        let input = Input::new_utf8(haystack).span(start..haystack.len());
         let mut patset = PatternSet::new(self.meta.pattern_len());
         self.meta.which_overlapping_matches(&input, &mut patset);
         SetMatches(patset)
@@ -357,7 +358,7 @@ impl RegexSet {
         // is in regex-automata, not regex. So maybe we should just accept a
         // 'SetMatches', which is basically just a newtype around PatternSet.
         let mut patset = PatternSet::new(self.meta.pattern_len());
-        let mut input = Input::new(haystack);
+        let mut input = Input::new_utf8(haystack);
         input.set_start(start);
         self.meta.which_overlapping_matches(&input, &mut patset);
         for pid in patset.iter() {