From 5250a11eb53241bc13ae5f5f84f2968bf79cc6cc Mon Sep 17 00:00:00 2001 From: rsasaki0109 Date: Wed, 17 Jun 2026 03:38:21 +0900 Subject: [PATCH] feat(gsdc2023): add TripWhitelistDDGate (default-deny DD-carrier gate) Phase 80 showed the broad DDAnchorGate (anchor coverage >= 0.6) is net-negative on Kaggle: ~84% of DD rows fell in p95_delta>=3m trips, with a 105.72m spike on ebf-y/pixel5. Broad DD gating cannot be rescued by a single floor. TripWhitelistDDGate keeps fgo_dd_carrier only for trips that are explicitly whitelisted, or that clear conservative internal-consistency thresholds (anchor>=0.8, dd_epochs>=1, dd_pairs_mean>=4.0, no_tdcp absent) -- stricter than the 0.6 floor. A deny-list always wins. It reads only internal signals (never truth), so an offline A/B audit can emit the whitelist and production replays it deterministically. It is a duck-typed drop-in for CombinedGate(dd=...). Adds 8 unit tests (incl. the marginal trip the broad gate kept but this denies, allow/deny precedence, DD-density floor, and CombinedGate substitution). Co-Authored-By: Claude Opus 4.8 (1M context) --- experiments/gsdc2023_ab_gates.py | 49 +++++++++++++++++++++++++++ tests/test_gsdc2023_ab_gates.py | 57 ++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/experiments/gsdc2023_ab_gates.py b/experiments/gsdc2023_ab_gates.py index 370d6810..db10fcfa 100644 --- a/experiments/gsdc2023_ab_gates.py +++ b/experiments/gsdc2023_ab_gates.py @@ -13,6 +13,9 @@ ``passthrough``: gates never block them, but they also do not constitute a reason to keep the submission's row delta. +``TripWhitelistDDGate`` is a stricter, default-deny replacement for +``DDAnchorGate`` (Phase 80 follow-up) that drops into ``CombinedGate(dd=...)``. + Inputs are the dataclasses from :mod:`gsdc2023_ab_source_mix` and :mod:`gsdc2023_ab_dd_signals`; gates do not touch JSON. """ @@ -87,6 +90,51 @@ def decide(self, signals: DDSignals, counts: SourceCounts) -> Disposition: return Disposition.REVERTED +@dataclass(frozen=True) +class TripWhitelistDDGate: + """Default-deny DD-carrier gate (Phase 80 follow-up). + + The broad :class:`DDAnchorGate` (anchor coverage >= 0.6) was net-negative on + Kaggle: ~84% of DD rows landed in ``p95_delta >= 3 m`` trips, with a 105.72 m + spike on ``ebf-y/pixel5``. Rather than relax/tighten a single floor, this gate + keeps ``fgo_dd_carrier`` only for trips that are **explicitly whitelisted**, + or that clear **conservative internal-consistency thresholds** (stricter than + 0.6). A deny-list always wins. It reads only internal signals (anchor + coverage, DD density) -- never truth -- so an offline audit can emit the + whitelist and production can replay it deterministically. + + Drop-in for :class:`CombinedGate` (``CombinedGate(dd=TripWhitelistDDGate(...))``): + same ``decide(signals, counts) -> bool`` shape as :class:`DDAnchorGate`. + ``allow`` / ``deny`` accept any container supporting ``in`` (set, frozenset, + list); trip ids must already be normalized (no ``train/`` / ``test/`` prefix). + """ + + allow: frozenset = frozenset() + deny: frozenset = frozenset() + min_anchor_coverage: float = 0.8 + min_dd_epochs: int = 1 + min_dd_pairs_mean: float = 4.0 + require_no_tdcp_absent: bool = True + + def decide(self, signals: DDSignals, counts: SourceCounts) -> bool: + if counts.fgo_dd_carrier <= 0: + return False + if signals.trip_id in self.deny: + return False + if signals.trip_id in self.allow: + return True + # Default-deny: only trips with strong internal evidence survive. + if self.require_no_tdcp_absent and counts.fgo_no_tdcp > 0: + return False + if signals.anchor_coverage < self.min_anchor_coverage: + return False + if signals.dd_dd_epochs < self.min_dd_epochs: + return False + if signals.dd_pairs_mean < self.min_dd_pairs_mean: + return False + return True + + @dataclass(frozen=True) class TripDisposition: """Result of applying ``CombinedGate`` to a single trip.""" @@ -144,6 +192,7 @@ def disposition_counts(dispositions: Iterable[TripDisposition]) -> dict[str, int "Disposition", "NoTdcpCoexistGate", "TripDisposition", + "TripWhitelistDDGate", "apply_gate", "disposition_counts", ] diff --git a/tests/test_gsdc2023_ab_gates.py b/tests/test_gsdc2023_ab_gates.py index 656bb383..5842c853 100644 --- a/tests/test_gsdc2023_ab_gates.py +++ b/tests/test_gsdc2023_ab_gates.py @@ -6,6 +6,7 @@ DDAnchorGate, Disposition, NoTdcpCoexistGate, + TripWhitelistDDGate, apply_gate, disposition_counts, ) @@ -85,6 +86,62 @@ def test_ntdc_gate_blocks_when_anchor_below_floor(): assert gate.decide(_signals("a", anchor_cov=0.5), _counts("a", no_tdcp=100)) is False +# --- TripWhitelistDDGate --------------------------------------------------- + + +def test_whitelist_gate_requires_dd_rows_present(): + gate = TripWhitelistDDGate() + assert gate.decide(_signals("a", anchor_cov=0.95), _counts("a", dd=0)) is False + + +def test_whitelist_gate_default_denies_marginal_trip_that_broad_gate_kept(): + # anchor 0.65 clears the old 0.6 floor but not the conservative 0.8 default. + broad = DDAnchorGate(min_anchor_coverage=0.6) + strict = TripWhitelistDDGate() + sig = _signals("a", anchor_cov=0.65) + cnt = _counts("a", dd=200) + assert broad.decide(sig, cnt) is True + assert strict.decide(sig, cnt) is False + + +def test_whitelist_gate_keeps_high_confidence_trip_by_internal_signals(): + gate = TripWhitelistDDGate() + assert gate.decide(_signals("a", anchor_cov=0.9), _counts("a", dd=200)) is True + + +def test_whitelist_gate_allow_overrides_failing_internal_signals(): + gate = TripWhitelistDDGate(allow={"a"}) + # Would fail the conservative floor, but the explicit allow keeps it. + assert gate.decide(_signals("a", anchor_cov=0.3), _counts("a", dd=200)) is True + + +def test_whitelist_gate_deny_overrides_everything(): + gate = TripWhitelistDDGate(allow={"a"}, deny={"a"}) + # Deny wins even over allow and strong internal evidence. + assert gate.decide(_signals("a", anchor_cov=0.99), _counts("a", dd=200)) is False + + +def test_whitelist_gate_blocks_low_dd_density(): + gate = TripWhitelistDDGate(min_dd_pairs_mean=4.0) + sig = DDSignals( + trip_id="a", n_epochs=1000, dd_anchor_epochs=900, + dd_dd_epochs=900, dd_base_snapped_epochs=900, dd_pairs_mean=2.0, + ) + assert gate.decide(sig, _counts("a", dd=200)) is False + + +def test_whitelist_gate_blocks_no_tdcp_coexistence_by_default(): + gate = TripWhitelistDDGate() + assert gate.decide(_signals("a", anchor_cov=0.9), _counts("a", dd=200, no_tdcp=100)) is False + + +def test_whitelist_gate_drops_into_combined_gate(): + # Duck-typed substitution for the DD leg of CombinedGate. + gate = CombinedGate(dd=TripWhitelistDDGate(allow={"a"})) + assert gate.decide(_signals("a", anchor_cov=0.2), _counts("a", dd=200)) is Disposition.KEPT + assert gate.decide(_signals("b", anchor_cov=0.65), _counts("b", dd=200)) is Disposition.REVERTED + + # --- CombinedGate ----------------------------------------------------------