From e78619743f7a187d79b4fef31566b94ebf2396a8 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 12:47:45 +0000 Subject: [PATCH] Fix pre-existing CI: bound wire decoder recursion; unblock Smoke + parse gates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two unrelated, pre-existing CI failures on main: parse-gate (vcltotal-parse): - The "total, never-panics" wire decoder recursed without bound, so a deeply-nested adversarial stream (thousands of nested Compare/Aggregate tags, or TList in the schema codec) overflowed the native stack and aborted the process — a crash that violates the decoder's totality contract. Bound decode depth via a DepthGuard with MAX_DEPTH=128 (the cap serde_json uses for the same reason), returning the new WireError::TooDeep. Regression test covers both dec_expr and dec_vqltype: a 200k-deep input now returns Err, never overflows. - The roundtrip property tests intermittently overflowed the default 2 MiB test-thread stack: proptest's recursive expr()/statement() strategies recurse far enough during value generation for some RNG seeds (the generated values are shallow, depth ~6 — harness overhead, not a deep AST). Run the parse-gate test step with RUST_MIN_STACK set to 32 MiB so the property tests are robust. backend-matrix (Smoke matrix — all per-prover jobs): - echidna-client is a standalone workspace with no committed Cargo.lock (its echidna-core path-dep only exists once the sibling is cloned), so `cargo test --locked` failed before building ("cannot create the lock file ... because --locked was passed"). Drop --locked so dependencies resolve fresh against the just-cloned sibling. Validated: full vcltotal-parse suite (lib + gate + parse + wire + conformance) green at RUST_MIN_STACK=32 MiB; clippy --all-targets -D warnings clean; the totality regression test passes on the default 2 MiB stack with no overflow. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_017nyxs8RgqZa72PzrTu3L75 --- .github/workflows/backend-matrix.yml | 8 +++- .github/workflows/parse-gate.yml | 9 ++++ src/interface/parse/src/wire.rs | 69 ++++++++++++++++++++++++++-- src/interface/parse/tests/wire.rs | 59 +++++++++++++++++++++++- 4 files changed, 140 insertions(+), 5 deletions(-) diff --git a/.github/workflows/backend-matrix.yml b/.github/workflows/backend-matrix.yml index 60603ed..a35ec34 100644 --- a/.github/workflows/backend-matrix.yml +++ b/.github/workflows/backend-matrix.yml @@ -107,4 +107,10 @@ jobs: # path-dep breaks standalone CI), so `-p` cannot select it. It is its # own standalone workspace root; run it by manifest path now that the # echidna sibling has been cloned above. - run: cargo test --manifest-path src/interface/echidna-client/Cargo.toml --tests --locked + # NOTE: echidna-client is a standalone workspace with NO committed + # Cargo.lock — its transitive `echidna-core` path-dep only exists once + # the sibling is cloned (step above), so a lockfile cannot be committed + # and `--locked` can never be satisfied: it fails before the tests even + # build ("cannot create the lock file ... because --locked was passed"). + # Resolve fresh against the just-cloned sibling instead (omit --locked). + run: cargo test --manifest-path src/interface/echidna-client/Cargo.toml --tests diff --git a/.github/workflows/parse-gate.yml b/.github/workflows/parse-gate.yml index 63904ba..65537c3 100644 --- a/.github/workflows/parse-gate.yml +++ b/.github/workflows/parse-gate.yml @@ -55,6 +55,15 @@ jobs: --all-targets --locked -- -D warnings - name: Tests + panic-free proptest invariant + env: + # proptest's recursive `expr()` / `statement()` strategies can, for + # some RNG seeds, recurse far enough during value GENERATION to + # overflow the default 2 MiB test-thread stack, intermittently + # aborting the whole run (SIGABRT). The generated values are shallow + # (depth ~6) and the codec itself is bounded — this is proptest + # harness overhead, not a deep AST — so a larger thread stack makes + # the property tests robust without weakening the invariant. + RUST_MIN_STACK: "33554432" run: | set -euo pipefail cargo test --manifest-path src/interface/parse/Cargo.toml --locked diff --git a/src/interface/parse/src/wire.rs b/src/interface/parse/src/wire.rs index dce22eb..fa85082 100644 --- a/src/interface/parse/src/wire.rs +++ b/src/interface/parse/src/wire.rs @@ -25,6 +25,17 @@ const SCHEMA_MAGIC: [u8; 4] = *b"VCLS"; const OP_MAGIC: [u8; 4] = *b"VCLT"; const VERSION: u16 = 1; +/// Maximum decode recursion depth. The same anti-DoS stance the module +/// already takes on untrusted *counts* (never pre-allocated) must also apply +/// to untrusted *nesting*: a deeply-nested adversarial stream (e.g. thousands +/// of nested `Compare`/`Aggregate` tags, or `TList` in the schema codec) must +/// not exhaust the native stack and abort the process — a stack overflow is a +/// crash, which would violate the decoder's total / never-panic contract. +/// Beyond this depth the decoder returns [`WireError::TooDeep`]. Real queries +/// and the Idris-mirrored AST nest only a handful deep; 128 matches the +/// recursion cap serde_json uses for the same reason. +const MAX_DEPTH: usize = 128; + /// Total decode failure (never a panic). #[derive(Debug, Clone, PartialEq, Eq)] pub enum WireError { @@ -42,6 +53,11 @@ pub enum WireError { /// pre-allocated against). LengthOverflow, TrailingBytes, + /// Decode recursion exceeded [`MAX_DEPTH`]: the stream nests deeper than + /// the decoder will descend. Rejected as a typed error rather than + /// recursing until the native stack overflows (anti-DoS; preserves the + /// total / never-panic contract). + TooDeep, } impl core::fmt::Display for WireError { @@ -57,6 +73,7 @@ impl core::fmt::Display for WireError { WireError::BadUtf8 => write!(f, "invalid UTF-8 in string"), WireError::LengthOverflow => write!(f, "length/count overflow"), WireError::TrailingBytes => write!(f, "trailing bytes after statement"), + WireError::TooDeep => write!(f, "decode recursion depth exceeded"), } } } @@ -500,6 +517,36 @@ fn enc_stmt(s: &Statement, o: &mut Vec) { struct D<'a> { b: &'a [u8], pos: usize, + /// Current decode recursion depth, bounded by [`MAX_DEPTH`]. Balanced by + /// [`DepthGuard`]: incremented on entry to each recursive decoder, restored + /// on scope exit (including the `?` error path). + depth: usize, +} + +/// RAII guard that bounds decoder recursion. Constructed at the top of each +/// recursive decoder; increments `D::depth` and rejects with +/// [`WireError::TooDeep`] past [`MAX_DEPTH`], then restores the previous depth +/// on drop so sibling (non-nested) reads are unaffected. +struct DepthGuard<'g, 'a> { + d: &'g mut D<'a>, +} + +impl<'g, 'a> DepthGuard<'g, 'a> { + fn enter(d: &'g mut D<'a>) -> Result { + d.depth = d.depth.saturating_add(1); + if d.depth > MAX_DEPTH { + // Restore before bailing so a caught error leaves depth balanced. + d.depth = d.depth.saturating_sub(1); + return Err(WireError::TooDeep); + } + Ok(DepthGuard { d }) + } +} + +impl Drop for DepthGuard<'_, '_> { + fn drop(&mut self) { + self.d.depth = self.d.depth.saturating_sub(1); + } } impl<'a> D<'a> { @@ -588,7 +635,11 @@ impl<'a> D<'a> { /// Decode a v1 wire stream into a `Statement`. Total. pub fn from_wire(input: &[u8]) -> Result { - let mut d = D { b: input, pos: 0 }; + let mut d = D { + b: input, + pos: 0, + depth: 0, + }; if d.take(4)? != MAGIC { return Err(WireError::BadMagic); } @@ -711,6 +762,8 @@ fn dec_epi_op(d: &mut D) -> Result { } fn dec_expr(d: &mut D) -> Result { + let g = DepthGuard::enter(d)?; + let d = &mut *g.d; match d.u8()? { 0 => Ok(Expr::Field(dec_fieldref(d)?)), 1 => Ok(Expr::Literal(dec_literal(d)?)), @@ -985,7 +1038,11 @@ fn dec_op(d: &mut D) -> Result { /// Decode a v1 `VCLT` wire stream into a `VclOp`. Total: every input /// yields `Ok`/`Err`, never a panic (same contract as [`from_wire`]). pub fn from_wire_op(input: &[u8]) -> Result { - let mut d = D { b: input, pos: 0 }; + let mut d = D { + b: input, + pos: 0, + depth: 0, + }; if d.take(4)? != OP_MAGIC { return Err(WireError::BadMagic); } @@ -1082,6 +1139,8 @@ pub fn to_wire_schema(s: &OctadSchema) -> Vec { } fn dec_vqltype(d: &mut D) -> Result { + let g = DepthGuard::enter(d)?; + let d = &mut *g.d; match d.u8()? { 0 => Ok(VqlType::TString), 1 => Ok(VqlType::TInt), @@ -1142,7 +1201,11 @@ fn dec_modschema(d: &mut D) -> Result { /// input yields `Ok`/`Err`, never a panic (same contract as /// [`from_wire`]). pub fn from_wire_schema(input: &[u8]) -> Result { - let mut d = D { b: input, pos: 0 }; + let mut d = D { + b: input, + pos: 0, + depth: 0, + }; if d.take(4)? != SCHEMA_MAGIC { return Err(WireError::BadMagic); } diff --git a/src/interface/parse/tests/wire.rs b/src/interface/parse/tests/wire.rs index f3e3850..897dbac 100644 --- a/src/interface/parse/tests/wire.rs +++ b/src/interface/parse/tests/wire.rs @@ -15,7 +15,7 @@ use proptest::prelude::*; use vcltotal_parse::ast::*; -use vcltotal_parse::{from_wire, from_wire_op, to_wire, to_wire_op}; +use vcltotal_parse::{from_wire, from_wire_op, from_wire_schema, to_wire, to_wire_op, WireError}; fn modality() -> impl Strategy { prop_oneof![ @@ -362,6 +362,63 @@ proptest! { } } +#[test] +fn decoder_rejects_deep_nesting_without_overflow() { + // Trusted-boundary totality (#25): a deeply-nested *valid-prefix* stream + // must be rejected with a typed error, never recurse until the native + // stack overflows (a stack overflow aborts the process — a crash, not a + // total `Ok`/`Err`). Streams are built by hand, so no encoder recursion is + // involved. `MAX_DEPTH` is 128; these go far past it on a normal stack. + + // ── Expr path (from_wire): where = N-deep Aggregate(Count, ...) ── + let stmt_with_deep_where = |n: usize| -> Vec { + let mut v = Vec::new(); + v.extend_from_slice(b"VCLW"); + v.extend_from_slice(&1u16.to_le_bytes()); + v.extend_from_slice(&0u32.to_le_bytes()); // select_items: 0 + v.push(2); // Source::Store + v.extend_from_slice(&0u32.to_le_bytes()); // store name "" + v.push(1); // where = Some(..) + for _ in 0..n { + v.push(4); // Expr::Aggregate + v.push(0); // AggFunc::Count + } + v.push(6); // Expr::Star (leaf) + v.extend_from_slice(&0u32.to_le_bytes()); // group_by: 0 + v.push(0); // having: None + v.extend_from_slice(&0u32.to_le_bytes()); // order_by: 0 + v.push(0); // limit: None + v.push(0); // offset: None + v.push(0); // proof_clause: None + v.push(0); // effect_decl: None + v.push(0); // version_const: None + v.push(0); // linear_annot: None + v.push(0); // epistemic_clause: None + v.push(0); // requested_level: ParseSafe + v + }; + // Far past the cap: typed rejection, NOT a stack overflow. + assert_eq!( + from_wire(&stmt_with_deep_where(200_000)), + Err(WireError::TooDeep) + ); + // Comfortably within the cap: still a faithful round-trip. + let ok = from_wire(&stmt_with_deep_where(16)).expect("nesting within cap decodes"); + assert!(matches!(ok.where_clause, Some(Expr::Aggregate(..)))); + + // ── Schema path (from_wire_schema): N-deep TList in the first field ── + let mut schema = Vec::new(); + schema.extend_from_slice(b"VCLS"); + schema.extend_from_slice(&1u16.to_le_bytes()); + schema.push(0); // graph modality = Graph + schema.extend_from_slice(&1u32.to_le_bytes()); // one field + schema.extend_from_slice(&0u32.to_le_bytes()); // field name "" + // 200k nested TList tags (VqlType::TList == 8). + schema.extend(std::iter::repeat_n(8u8, 200_000)); + // Hits the depth cap long before consuming the rest of the stream. + assert_eq!(from_wire_schema(&schema), Err(WireError::TooDeep)); +} + #[test] fn golden_minimal() { let s = Statement {