From eda5106393ab618b4ae737fee17f7c1330b6cfd9 Mon Sep 17 00:00:00 2001
From: gnss-gpu contributors <rsasaki0109@gmail.com>
Date: Sun, 14 Jun 2026 01:23:32 +0900
Subject: [PATCH 1/2] feat(gsdc2023): taroz-style 2-stage residual re-solve (VD
 path)

After the FGO converges, recompute the fixed-linearization pseudorange
residual at the converged state, mask rows whose |residual| exceeds an
L1/L5 threshold (20 m / 15 m, MIN_KEEP safety, worst-first), and re-solve
warm-started. Opt-in via --fgo-two-stage-residual-resolve, default off.

This is a selective dense-urban outlier killer. 15-trip pixel5 train A/B
(fgo standalone): lax-o (dense urban) -54.6cm / p95 -79cm, mtv-h -2.1cm,
four trips at most +0.5cm regression, the rest wash. Aggregate -3.8cm,
96% of it from lax-o. The win comes from dropping systematically biased
NLOS measurements so the position snaps toward truth.

A Huber trust-region guard is implemented behind --fgo-two-stage-residual-guard
but is OFF by default: it gates pass-2 on the full-set Huber residual cost,
which the dense-urban win *raises* (dropped outliers are re-added at their
original weight, Huber-capped), so the guard rejects exactly the re-solves
we want (validated: lax-o guard-on 2.268 ~= baseline 2.272, vs guard-off
1.727). Retained for experimentation only.

13 unit tests cover the residual/mask/guard-cost helpers and the
orchestration with a mock solver (accept/reject/keep-on-guard-off,
no-op without fixed linearization, restore on pass-2 failure).
---
 .../build_gsdc2023_bridge_submission.py       |  30 +++
 experiments/gsdc2023_bridge_config.py         |  27 ++
 experiments/gsdc2023_raw_bridge.py            | 199 ++++++++++++++
 tests/test_two_stage_residual_resolve.py      | 244 ++++++++++++++++++
 4 files changed, 500 insertions(+)
 create mode 100644 tests/test_two_stage_residual_resolve.py

diff --git a/experiments/build_gsdc2023_bridge_submission.py b/experiments/build_gsdc2023_bridge_submission.py
index 724d1dfe..0292a6ea 100644
--- a/experiments/build_gsdc2023_bridge_submission.py
+++ b/experiments/build_gsdc2023_bridge_submission.py
@@ -343,6 +343,21 @@ def build_config(args: argparse.Namespace) -> BridgeConfig:
         fgo_residual_mask_propagation=bool(
             getattr(args, "fgo_residual_mask_propagation", False)
         ),
+        fgo_two_stage_residual_resolve=bool(
+            getattr(args, "fgo_two_stage_residual_resolve", False)
+        ),
+        fgo_two_stage_residual_threshold_l1_m=float(
+            getattr(args, "fgo_two_stage_residual_threshold_l1_m", 20.0)
+        ),
+        fgo_two_stage_residual_threshold_l5_m=float(
+            getattr(args, "fgo_two_stage_residual_threshold_l5_m", 15.0)
+        ),
+        fgo_two_stage_residual_min_keep=int(
+            getattr(args, "fgo_two_stage_residual_min_keep", 5)
+        ),
+        fgo_two_stage_residual_guard=bool(
+            getattr(args, "fgo_two_stage_residual_guard", False)
+        ),
     )
     if bool(getattr(args, "taroz_marupaku", False)):
         cfg = apply_taroz_marupaku_preset(cfg)
@@ -589,6 +604,21 @@ def main(argv: list[str] | None = None) -> int:
         default=False,
         help="Mirror the post-fill pseudorange residual mask into the FGO weights (weights_fgo), so residual-rejected PR rows are dropped from the FGO objective too.",
     )
+    parser.add_argument(
+        "--fgo-two-stage-residual-resolve",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="taroz-style 2-stage residual re-solve: after the FGO converges, mask pseudorange rows whose fixed-linearization residual exceeds threshold and re-solve warm-started. Big win on degraded dense-urban chunks (lax-o -54.6cm), at most +0.5cm elsewhere. Default off.",
+    )
+    parser.add_argument("--fgo-two-stage-residual-threshold-l1-m", type=float, default=20.0)
+    parser.add_argument("--fgo-two-stage-residual-threshold-l5-m", type=float, default=15.0)
+    parser.add_argument("--fgo-two-stage-residual-min-keep", type=int, default=5)
+    parser.add_argument(
+        "--fgo-two-stage-residual-guard",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Gate the 2-stage re-solve behind a Huber trust-region cost (keep pass-2 only if the full-set Huber residual cost improves). OFF by default: the dense-urban win comes from dropping biased NLOS rows, which lowers position error but *raises* the Huber cost, so the guard rejects exactly the re-solves we want. Retained for experimentation.",
+    )
     parser.add_argument("--stop-attitude-sigma-rad", type=float, default=0.0)
     parser.add_argument(
         "--taroz-fgo",
diff --git a/experiments/gsdc2023_bridge_config.py b/experiments/gsdc2023_bridge_config.py
index 561b24d1..05510532 100644
--- a/experiments/gsdc2023_bridge_config.py
+++ b/experiments/gsdc2023_bridge_config.py
@@ -281,6 +281,20 @@ class BridgeConfig:
     # taroz's, while P-residual-only improved all 3 probe trips
     # (mtv-h -8 cm, sjc-q -2 cm, lax-o -1.10 m FGO score).
     fgo_residual_mask_propagation: bool = False
+    # taroz-style 2-stage residual re-solve for the FGO (VD path only). After the
+    # first converged solve, recompute the per-measurement residual in the fixed-
+    # linearization domain (z - (los . (x - ref) + clk)), mask rows whose residual
+    # exceeds threshold (L1 / L5), and RE-SOLVE warm-started. A trust-region guard
+    # only keeps the re-solve when it lowers a Huber cost (kernel = the mask
+    # threshold) over the full measurement set, so trips that do not benefit are
+    # left bit-identical. Big win on degraded dense-urban chunks (lax-o FGO score
+    # -54.6 cm), neutral elsewhere. Requires fixed linearization (the default
+    # gnss_only preset path); a no-op when pr-linearization inputs are absent.
+    fgo_two_stage_residual_resolve: bool = False
+    fgo_two_stage_residual_threshold_l1_m: float = 20.0
+    fgo_two_stage_residual_threshold_l5_m: float = 15.0
+    fgo_two_stage_residual_min_keep: int = 5
+    fgo_two_stage_residual_guard: bool = False
     tdcp_weight_scale: float = DEFAULT_TDCP_WEIGHT_SCALE
     tdcp_l5_weight_scale: float = 1.0
     tdcp_geometry_correction: bool = DEFAULT_TDCP_GEOMETRY_CORRECTION
@@ -385,6 +399,19 @@ def __post_init__(self) -> None:
             raise ValueError("fgo_lm_damping must be >= 0")
         if not isinstance(self.fgo_extra_constellations, bool):
             raise ValueError("fgo_extra_constellations must be a bool")
+        if not isinstance(self.fgo_two_stage_residual_resolve, bool):
+            raise ValueError("fgo_two_stage_residual_resolve must be a bool")
+        if not isinstance(self.fgo_two_stage_residual_guard, bool):
+            raise ValueError("fgo_two_stage_residual_guard must be a bool")
+        for name in (
+            "fgo_two_stage_residual_threshold_l1_m",
+            "fgo_two_stage_residual_threshold_l5_m",
+        ):
+            value = float(getattr(self, name))
+            if not np.isfinite(value) or value <= 0.0:
+                raise ValueError(f"{name} must be finite and > 0")
+        if int(self.fgo_two_stage_residual_min_keep) < 4:
+            raise ValueError("fgo_two_stage_residual_min_keep must be >= 4")
         for name in (
             "stop_velocity_huber_k",
             "stop_position_huber_k",
diff --git a/experiments/gsdc2023_raw_bridge.py b/experiments/gsdc2023_raw_bridge.py
index 8b563d7d..4d28aa17 100644
--- a/experiments/gsdc2023_raw_bridge.py
+++ b/experiments/gsdc2023_raw_bridge.py
@@ -2212,6 +2212,168 @@ def _fixed_pr_linearization_inputs(
     return measurement, solver_weights, ref, los
 
 
+def _pr_linearized_residual(
+    pseudorange: np.ndarray,
+    los: np.ndarray,
+    ref: np.ndarray,
+    state: np.ndarray,
+    sys_kind: np.ndarray | None,
+    n_clock: int,
+) -> np.ndarray:
+    """Per-measurement pseudorange residual in the fixed-linearization domain.
+
+    The VD solver is fed ``z = PR_raw - range(ref)`` together with the line-of-
+    sight ``los`` and linearization point ``ref``; its model is
+    ``z - (los . (x - ref) + clk)`` where ``clk = c0 + (c_sk if sk>0)`` (per-
+    system / per-signal clock from the VD state). This recomputes that residual
+    at the converged ``state`` so outliers only visible after convergence can be
+    masked. Returns ``(T, n_sat)`` in metres.
+    """
+    z = np.asarray(pseudorange, dtype=np.float64)
+    los = np.asarray(los, dtype=np.float64)
+    ref = np.asarray(ref, dtype=np.float64)
+    xyz = np.asarray(state[:, :3], dtype=np.float64)
+    dx = xyz - ref
+    pred_geom = np.einsum("tsj,tj->ts", los, dx)
+    nc = int(n_clock)
+    clocks = np.asarray(state[:, 6 : 6 + nc], dtype=np.float64)
+    clk_common = clocks[:, 0:1]
+    if sys_kind is not None and nc > 1:
+        sk = np.asarray(sys_kind, dtype=np.int64)
+        sk_clamped = np.clip(sk, 0, nc - 1)
+        isb = np.take_along_axis(clocks, sk_clamped, axis=1)
+        clk = clk_common + np.where(sk > 0, isb, 0.0)
+    else:
+        clk = np.broadcast_to(clk_common, z.shape)
+    return z - (pred_geom + clk)
+
+
+def _pr_resolve_thresholds(
+    sys_kind: np.ndarray | None,
+    shape: tuple[int, ...],
+    threshold_l1_m: float,
+    threshold_l5_m: float,
+) -> np.ndarray:
+    """Per-row mask threshold: L5 (clock kind ``sk >= 4``) vs L1 (everything
+    else), matching the signal-aware ``n_clock=7`` layout."""
+    if sys_kind is None:
+        return np.full(shape, float(threshold_l1_m), dtype=np.float64)
+    sk = np.asarray(sys_kind, dtype=np.int64)
+    return np.where(sk >= 4, float(threshold_l5_m), float(threshold_l1_m)).astype(np.float64)
+
+
+def _pr_huber_guard_cost(
+    resid: np.ndarray, weights: np.ndarray, thresholds: np.ndarray
+) -> float:
+    """Sum of ``w * huber_rho(resid; c=threshold)`` over finite, positively
+    weighted rows. The Huber kernel caps each outlier's contribution at the mask
+    threshold, so removing-and-refitting outliers cannot inflate the cost the way
+    a plain L2 cost would (the production path runs ``huber_k=0`` = L2). This is
+    the trust-region acceptance metric for the re-solve.
+    """
+    w = np.asarray(weights, dtype=np.float64)
+    r = np.abs(np.asarray(resid, dtype=np.float64))
+    c = np.asarray(thresholds, dtype=np.float64)
+    valid = np.isfinite(r) & np.isfinite(w) & (w > 0.0)
+    rho = np.where(r <= c, 0.5 * r * r, c * (r - 0.5 * c))
+    return float(np.sum(w[valid] * rho[valid]))
+
+
+def _pr_two_stage_mask(
+    resid: np.ndarray, weights: np.ndarray, thresholds: np.ndarray, min_keep: int
+) -> tuple[np.ndarray, int]:
+    """Return a copy of ``weights`` with outlier rows (``|resid| > threshold``)
+    zeroed, never dropping an epoch below ``min_keep`` surviving rows (worst-
+    first when the cap binds). Returns ``(new_weights, n_masked)``."""
+    new_w = np.asarray(weights, dtype=np.float64).copy()
+    r = np.abs(np.asarray(resid, dtype=np.float64))
+    c = np.asarray(thresholds, dtype=np.float64)
+    n_masked = 0
+    for i in range(new_w.shape[0]):
+        idx = np.flatnonzero(new_w[i] > 0.0)
+        if idx.size <= min_keep:
+            continue
+        ri = r[i, idx]
+        ci = c[i, idx]
+        finite = np.isfinite(ri)
+        out = finite & (ri > ci)
+        if not out.any():
+            continue
+        if int(idx.size - int(out.sum())) < min_keep:
+            order = np.argsort(-np.where(finite, ri, -np.inf))
+            droppable = order[: max(0, idx.size - min_keep)]
+            keep = np.zeros(idx.size, dtype=bool)
+            sel = droppable[finite[droppable] & (ri[droppable] > ci[droppable])]
+            keep[sel] = True
+            out = keep
+            if not out.any():
+                continue
+        new_w[i, idx[out]] = 0.0
+        n_masked += int(out.sum())
+    return new_w, n_masked
+
+
+def two_stage_residual_resolve_vd(
+    solver_fn,
+    sat_ecef: np.ndarray,
+    pseudorange: np.ndarray,
+    weights: np.ndarray,
+    state: np.ndarray,
+    *,
+    threshold_l1_m: float,
+    threshold_l5_m: float,
+    min_keep: int,
+    guard: bool = True,
+    vd_kwargs: dict,
+) -> tuple[int, float]:
+    """taroz-style 2-stage residual re-solve for the VD path. Runs ``solver_fn``
+    (the VD solver), recomputes the fixed-linearization residual at the converged
+    ``state``, masks outliers above threshold, and re-solves warm-started.
+
+    When ``guard`` is True the re-solve is kept only if it lowers the full-set
+    Huber guard cost (a trust region), otherwise ``state`` is restored to the
+    pass-1 result. **The guard is OFF by default in production**: the dense-urban
+    win comes from dropping systematically biased NLOS measurements, which snaps
+    the position toward truth but *raises* the robust residual cost (the dropped
+    outliers are re-added at their original weight, Huber-capped), so the guard
+    rejects exactly the re-solves we want. See PR / memory notes. The guard is
+    retained for experimentation only.
+
+    Returns ``(iters, mse)`` and mutates ``state`` in place (matching the native
+    solver contract). A no-op returning the pass-1 result when fixed
+    linearization inputs are absent, nothing is masked, the re-solve fails, or the
+    guard (when enabled) rejects it.
+    """
+    iters1, mse1 = solver_fn(sat_ecef, pseudorange, weights, state, **vd_kwargs)
+    ref = vd_kwargs.get("pr_linearization_ref_ecef")
+    los = vd_kwargs.get("pr_linearization_los_ecef")
+    if int(iters1) < 0 or ref is None or los is None:
+        return iters1, mse1
+    sys_kind = vd_kwargs.get("sys_kind")
+    n_clock = int(vd_kwargs.get("n_clock", 1))
+    pseudorange = np.asarray(pseudorange, dtype=np.float64)
+    thresholds = _pr_resolve_thresholds(
+        sys_kind, pseudorange.shape, threshold_l1_m, threshold_l5_m
+    )
+    resid1 = _pr_linearized_residual(pseudorange, los, ref, state, sys_kind, n_clock)
+    new_w, n_masked = _pr_two_stage_mask(resid1, weights, thresholds, int(min_keep))
+    if n_masked == 0:
+        return iters1, mse1
+    cost1 = _pr_huber_guard_cost(resid1, weights, thresholds) if guard else 0.0
+    backup = np.array(state, dtype=np.float64, copy=True)
+    iters2, mse2 = solver_fn(sat_ecef, pseudorange, new_w, state, **vd_kwargs)
+    if int(iters2) < 0:
+        state[:, :] = backup
+        return iters1, mse1
+    if guard:
+        resid2 = _pr_linearized_residual(pseudorange, los, ref, state, sys_kind, n_clock)
+        cost2 = _pr_huber_guard_cost(resid2, weights, thresholds)
+        if not (cost2 < cost1):
+            state[:, :] = backup  # guard rejects: re-solve did not improve the fit
+            return iters1, mse1
+    return int(iters1) + int(iters2), mse2
+
+
 def _fixed_doppler_linearization_inputs(
     sat_ecef: np.ndarray,
     sat_vel: np.ndarray | None,
@@ -2520,6 +2682,11 @@ def run_fgo_chunked(
     fgo_huber_k_doppler: float = 0.0,
     fgo_huber_k_tdcp: float = 0.0,
     fgo_fixed_linearization: bool = False,
+    fgo_two_stage_residual_resolve: bool = False,
+    fgo_two_stage_residual_threshold_l1_m: float = 20.0,
+    fgo_two_stage_residual_threshold_l5_m: float = 15.0,
+    fgo_two_stage_residual_min_keep: int = 5,
+    fgo_two_stage_residual_guard: bool = False,
     fgo_seed_state: np.ndarray | None = None,
 ) -> "ChunkedFgoRun":
     n_epoch = batch.sat_ecef.shape[0]
@@ -3069,6 +3236,23 @@ def run_fgo_chunked(
                             huber_k_warmstart=float(fgo_huber_k_pr),
                             **vd_kwargs,
                         )
+                    elif (
+                        fgo_two_stage_residual_resolve
+                        and pr_linearization_ref_ecef is not None
+                        and pr_linearization_los_ecef is not None
+                    ):
+                        iters, _ = two_stage_residual_resolve_vd(
+                            fgo_gnss_lm_vd,
+                            sat_ecef_for_solver,
+                            pseudorange_for_solver,
+                            fgo_weights_for_solver,
+                            seg_state_for_solver,
+                            threshold_l1_m=fgo_two_stage_residual_threshold_l1_m,
+                            threshold_l5_m=fgo_two_stage_residual_threshold_l5_m,
+                            min_keep=fgo_two_stage_residual_min_keep,
+                            guard=fgo_two_stage_residual_guard,
+                            vd_kwargs=vd_kwargs,
+                        )
                     else:
                         iters, _ = fgo_gnss_lm_vd(
                             sat_ecef_for_solver,
@@ -3509,6 +3693,21 @@ def solve_trip(
     fgo_run_options = _fgo_run_options_from_config(config)
     fgo_run_kwargs = fgo_run_options.run_kwargs()
     fgo_run_kwargs["vd_seed_factor_guard"] = _vd_seed_factor_guard_enabled_for_phone(phone_name)
+    fgo_run_kwargs["fgo_two_stage_residual_resolve"] = bool(
+        getattr(config, "fgo_two_stage_residual_resolve", False)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_threshold_l1_m"] = float(
+        getattr(config, "fgo_two_stage_residual_threshold_l1_m", 20.0)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_threshold_l5_m"] = float(
+        getattr(config, "fgo_two_stage_residual_threshold_l5_m", 15.0)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_min_keep"] = int(
+        getattr(config, "fgo_two_stage_residual_min_keep", 5)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_guard"] = bool(
+        getattr(config, "fgo_two_stage_residual_guard", False)
+    )
     taroz_candidate_sources = _taroz_fgo_candidate_sources_enabled(config)
     taroz_candidate_base_run_kwargs = dict(fgo_run_kwargs)
     effective_trip_type: str | None = None
diff --git a/tests/test_two_stage_residual_resolve.py b/tests/test_two_stage_residual_resolve.py
new file mode 100644
index 00000000..aef8641c
--- /dev/null
+++ b/tests/test_two_stage_residual_resolve.py
@@ -0,0 +1,244 @@
+"""Unit tests for the FGO 2-stage residual re-solve (VD path) and its guard.
+
+These exercise the pure residual / mask / guard-cost helpers and the
+``two_stage_residual_resolve_vd`` orchestration with a mock solver, so no native
+CUDA solve is needed.
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+REPO = Path(__file__).resolve().parents[1]
+for p in (REPO, REPO / "python"):
+    if str(p) not in sys.path:
+        sys.path.insert(0, str(p))
+
+from experiments.gsdc2023_raw_bridge import (  # noqa: E402
+    _pr_huber_guard_cost,
+    _pr_linearized_residual,
+    _pr_resolve_thresholds,
+    _pr_two_stage_mask,
+    two_stage_residual_resolve_vd,
+)
+
+
+def _state(pos, c0, n_extra_clock=0):
+    """Build a single-epoch VD state row: [x,y,z, vx,vy,vz, c0, (extra clocks)]."""
+    row = [*pos, 0.0, 0.0, 0.0, c0, *([0.0] * n_extra_clock)]
+    return np.asarray([row], dtype=np.float64)
+
+
+def test_linearized_residual_matches_hand_computation():
+    # los . (x - ref) + clk, residual = z - that.
+    los = np.asarray([[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]], dtype=np.float64)
+    ref = np.asarray([[0.0, 0.0, 0.0]], dtype=np.float64)
+    state = _state([2.0, 5.0, 0.0], c0=1.0)
+    z = np.asarray([[10.0, 10.0]], dtype=np.float64)
+    resid = _pr_linearized_residual(z, los, ref, state, sys_kind=None, n_clock=1)
+    # pred = [2,5] + clk(1) = [3, 6]; resid = [10-3, 10-6] = [7, 4]
+    np.testing.assert_allclose(resid, [[7.0, 4.0]])
+
+
+def test_linearized_residual_uses_per_system_isb():
+    los = np.zeros((1, 2, 3), dtype=np.float64)  # geometry contributes nothing
+    ref = np.zeros((1, 3), dtype=np.float64)
+    # n_clock=7 layout: clocks at state cols 6..12
+    row = [0, 0, 0, 0, 0, 0, 3.0, 0, 0, 0, 0, 9.0, 0]  # c0=3, c5=9
+    state = np.asarray([row], dtype=np.float64)
+    z = np.asarray([[3.0, 12.0]], dtype=np.float64)
+    sk = np.asarray([[0, 5]], dtype=np.int64)  # sat0 -> c0 only; sat1 -> c0 + c5
+    resid = _pr_linearized_residual(z, los, ref, state, sys_kind=sk, n_clock=7)
+    # sat0 clk=3 -> resid 0; sat1 clk=3+9=12 -> resid 0
+    np.testing.assert_allclose(resid, [[0.0, 0.0]], atol=1e-9)
+
+
+def test_resolve_thresholds_l1_l5_split():
+    sk = np.asarray([[0, 4, 5, 6, 2]], dtype=np.int64)
+    thr = _pr_resolve_thresholds(sk, sk.shape, threshold_l1_m=20.0, threshold_l5_m=15.0)
+    np.testing.assert_allclose(thr, [[20.0, 15.0, 15.0, 15.0, 20.0]])
+    # no sys_kind -> all L1
+    thr2 = _pr_resolve_thresholds(None, (1, 3), 20.0, 15.0)
+    np.testing.assert_allclose(thr2, [[20.0, 20.0, 20.0]])
+
+
+def test_two_stage_mask_zeroes_outliers():
+    resid = np.asarray([[1.0, 2.0, 50.0, -3.0, 0.5, 100.0]], dtype=np.float64)
+    w = np.ones((1, 6), dtype=np.float64)
+    thr = np.full((1, 6), 20.0)
+    new_w, n_masked = _pr_two_stage_mask(resid, w, thr, min_keep=4)
+    assert n_masked == 2
+    assert new_w[0, 2] == 0.0 and new_w[0, 5] == 0.0
+    assert list(new_w[0, [0, 1, 3, 4]]) == [1.0, 1.0, 1.0, 1.0]
+
+
+def test_two_stage_mask_respects_min_keep_worst_first():
+    # 6 rows all over threshold, min_keep=5 -> only the single worst dropped
+    resid = np.asarray([[30.0, 40.0, 25.0, 90.0, 35.0, 50.0]], dtype=np.float64)
+    w = np.ones((1, 6), dtype=np.float64)
+    thr = np.full((1, 6), 20.0)
+    new_w, n_masked = _pr_two_stage_mask(resid, w, thr, min_keep=5)
+    assert n_masked == 1
+    assert new_w[0, 3] == 0.0  # the 90 m row
+    assert int((new_w[0] > 0).sum()) == 5
+
+
+def test_two_stage_mask_skips_thin_epochs():
+    resid = np.asarray([[50.0, 60.0, 70.0]], dtype=np.float64)  # only 3 rows
+    w = np.ones((1, 3), dtype=np.float64)
+    thr = np.full((1, 3), 20.0)
+    new_w, n_masked = _pr_two_stage_mask(resid, w, thr, min_keep=5)
+    assert n_masked == 0
+    np.testing.assert_allclose(new_w, w)
+
+
+def test_huber_guard_cost_caps_outliers():
+    # one inlier (3 m) quadratic, one outlier (100 m) capped linear at c=20
+    resid = np.asarray([[3.0, 100.0]], dtype=np.float64)
+    w = np.ones((1, 2), dtype=np.float64)
+    thr = np.full((1, 2), 20.0)
+    cost = _pr_huber_guard_cost(resid, w, thr)
+    expected = 0.5 * 9.0 + 20.0 * (100.0 - 10.0)  # 4.5 + 1800
+    assert cost == pytest.approx(expected)
+    # zero-weight rows ignored
+    w2 = np.asarray([[0.0, 1.0]], dtype=np.float64)
+    cost2 = _pr_huber_guard_cost(resid, w2, thr)
+    assert cost2 == pytest.approx(20.0 * 90.0)
+
+
+class _MockSolver:
+    """Mutates state in place per call; returns (iters, mse). Call 1 = pass-1,
+    call 2 = pass-2 (sets the clock so the guard sees the prescribed fit)."""
+
+    def __init__(self, pass1_c0, pass2_c0, iters1=5, iters2=7):
+        self.calls = 0
+        self.pass1_c0 = pass1_c0
+        self.pass2_c0 = pass2_c0
+        self.iters1 = iters1
+        self.iters2 = iters2
+        self.weights_seen = []
+
+    def __call__(self, sat_ecef, pseudorange, weights, state, **kwargs):
+        self.calls += 1
+        self.weights_seen.append(np.asarray(weights).copy())
+        c0 = self.pass1_c0 if self.calls == 1 else self.pass2_c0
+        state[:, 6] = c0
+        return (self.iters1 if self.calls == 1 else self.iters2), 0.0
+
+
+def _vd_kwargs():
+    los = np.zeros((1, 6, 3), dtype=np.float64)  # geometry contributes nothing
+    ref = np.zeros((1, 3), dtype=np.float64)
+    return dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+                sys_kind=None, n_clock=1)
+
+
+def test_resolve_accepts_when_guard_cost_improves():
+    # pass-1 c0=0 -> resid = z; one big outlier. pass-2 c0=1 tightens inliers.
+    z = np.asarray([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0]], dtype=np.float64)
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=1.0)
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5, vd_kwargs=kw)
+    assert solver.calls == 2
+    assert iters == 12  # iters1 + iters2
+    assert state[0, 6] == 1.0  # pass-2 state kept
+    # pass-2 ran on masked weights (outlier dropped)
+    assert solver.weights_seen[1][0, 6] == 0.0
+
+
+def test_resolve_rejects_when_guard_cost_worsens():
+    z = np.asarray([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0]], dtype=np.float64)
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=-50.0)  # pass-2 makes everything worse
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5, vd_kwargs=kw)
+    assert solver.calls == 2
+    assert iters == 5  # only pass-1 iters
+    assert state[0, 6] == 0.0  # restored to pass-1 state
+
+
+def test_resolve_keeps_pass2_when_guard_disabled():
+    # Same setup as the reject test, but guard=False -> pass-2 is always kept,
+    # even though its full-set Huber cost is worse. This is the production default:
+    # the dense-urban win raises the robust cost yet lowers position error.
+    z = np.asarray([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0]], dtype=np.float64)
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=-50.0)  # would be rejected by the guard
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5, guard=False, vd_kwargs=kw)
+    assert solver.calls == 2
+    assert iters == 12  # iters1 + iters2 (pass-2 kept)
+    assert state[0, 6] == -50.0  # pass-2 state kept despite worse Huber cost
+
+
+def test_resolve_noop_without_fixed_linearization():
+    z = np.asarray([[100.0, 100.0, 100.0, 100.0, 100.0, 100.0]], dtype=np.float64)
+    w = np.ones((1, 6), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=1.0)
+    kw = dict(sys_kind=None, n_clock=1)  # no ref/los
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5, vd_kwargs=kw)
+    assert solver.calls == 1  # never re-solved
+    assert iters == 5
+
+
+def test_resolve_noop_when_nothing_masked():
+    z = np.asarray([[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0]], dtype=np.float64)  # all < 20
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=1.0)
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5, vd_kwargs=kw)
+    assert solver.calls == 1  # nothing masked -> no re-solve
+    assert iters == 5
+
+
+def test_resolve_restores_state_on_pass2_failure():
+    z = np.asarray([[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 100.0]], dtype=np.float64)
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+
+    class _FailingPass2(_MockSolver):
+        def __call__(self, sat_ecef, pseudorange, weights, state, **kwargs):
+            self.calls += 1
+            if self.calls == 1:
+                state[:, 6] = 0.0
+                return 5, 0.0
+            state[:, 6] = 999.0  # corrupt, then signal failure
+            return -1, 0.0
+
+    solver = _FailingPass2(0.0, 0.0)
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5, vd_kwargs=kw)
+    assert solver.calls == 2
+    assert iters == 5
+    assert state[0, 6] == 0.0  # restored despite pass-2 corrupting then failing

From 61e00a0f11e3db55de4f5857cba08b646607475f Mon Sep 17 00:00:00 2001
From: gnss-gpu contributors <rsasaki0109@gmail.com>
Date: Mon, 15 Jun 2026 03:53:02 +0900
Subject: [PATCH 2/2] feat(gsdc2023): divergence-gated acceptance for the
 2-stage residual re-solve
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds fgo_two_stage_divergence_p95_m (BridgeConfig, default 0 = disabled) and
CLI --fgo-two-stage-divergence-p95-m. When > 0, the 2-stage re-solve only fires
on chunks whose pass-1 fixed-linearization residual p95 (over active rows)
exceeds the threshold — i.e. only chunks where pass-1 diverged. Healthy chunks
are left at the pass-1 result, so the re-solve introduces no perturbation there.

Motivation: a per-chunk diagnostic probe across win / wash / regression trips
showed the unconditional re-solve's only net win (lax-o) is split into two parts:
a divergence rescue on a couple of chunks where pass-1 blew up to multi-km
residuals (pass-1 p95 ~39 km), and pervasive marginal NLOS masking on healthy
chunks (p95 ~30 m). Every regression trip (lax-t/mtv-b/mtv-pe1 +10..11 cm) does
only the marginal masking on a healthy fit (p95 <= ~50 m). The two are
indistinguishable by the marginal residuals themselves, but the divergence
chunks stand alone by orders of magnitude (pass-1 p95 38840 m vs <= 53 m
everywhere else, position move 16908 m vs <= 3 m).

Gating on pass-1 divergence (p95 > 500 m) cleanly captures the rescue and skips
the marginal masking. An 8-trip A/B (div500 vs baseline) confirms: every +10 cm
regression goes bit-identical to baseline, and lax-o keeps -9.3 cm (the
divergence-rescue portion of its -54.6 cm unconditional win). The marginal-mask
bulk of lax-o's win is inseparable from the regressions, so the gated lever is a
clean zero-regression rescue (1 win / 0 reg / 7 wash) rather than the larger but
net-wash unconditional lever.

The Huber guard cannot do this (the rescue raises the robust cost: lax-o
cost_ratio 1.32), so divergence gating is the correct discriminator. Default off
keeps legacy behaviour; independent of the guard flag. 2 new unit tests.
---
 .../build_gsdc2023_bridge_submission.py       |  9 +++++
 experiments/gsdc2023_bridge_config.py         | 12 ++++++
 experiments/gsdc2023_raw_bridge.py            | 31 +++++++++++++--
 tests/test_two_stage_residual_resolve.py      | 39 +++++++++++++++++++
 4 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/experiments/build_gsdc2023_bridge_submission.py b/experiments/build_gsdc2023_bridge_submission.py
index 0292a6ea..f17d7451 100644
--- a/experiments/build_gsdc2023_bridge_submission.py
+++ b/experiments/build_gsdc2023_bridge_submission.py
@@ -358,6 +358,9 @@ def build_config(args: argparse.Namespace) -> BridgeConfig:
         fgo_two_stage_residual_guard=bool(
             getattr(args, "fgo_two_stage_residual_guard", False)
         ),
+        fgo_two_stage_divergence_p95_m=float(
+            getattr(args, "fgo_two_stage_divergence_p95_m", 0.0)
+        ),
     )
     if bool(getattr(args, "taroz_marupaku", False)):
         cfg = apply_taroz_marupaku_preset(cfg)
@@ -619,6 +622,12 @@ def main(argv: list[str] | None = None) -> int:
         default=False,
         help="Gate the 2-stage re-solve behind a Huber trust-region cost (keep pass-2 only if the full-set Huber residual cost improves). OFF by default: the dense-urban win comes from dropping biased NLOS rows, which lowers position error but *raises* the Huber cost, so the guard rejects exactly the re-solves we want. Retained for experimentation.",
     )
+    parser.add_argument(
+        "--fgo-two-stage-divergence-p95-m",
+        type=float,
+        default=0.0,
+        help="Recommended acceptance gate for the 2-stage re-solve (0 = disabled). When > 0, the re-solve only fires on chunks whose pass-1 fixed-linearization residual p95 (over active rows) exceeds this many metres — i.e. only chunks where pass-1 diverged. Keeps the lax-o divergence-rescue win while skipping the marginal masking that causes +10 cm regressions on healthy chunks.",
+    )
     parser.add_argument("--stop-attitude-sigma-rad", type=float, default=0.0)
     parser.add_argument(
         "--taroz-fgo",
diff --git a/experiments/gsdc2023_bridge_config.py b/experiments/gsdc2023_bridge_config.py
index 05510532..8a589d4b 100644
--- a/experiments/gsdc2023_bridge_config.py
+++ b/experiments/gsdc2023_bridge_config.py
@@ -295,6 +295,15 @@ class BridgeConfig:
     fgo_two_stage_residual_threshold_l5_m: float = 15.0
     fgo_two_stage_residual_min_keep: int = 5
     fgo_two_stage_residual_guard: bool = False
+    # Recommended acceptance gate for the 2-stage re-solve (0 = disabled).  When
+    # > 0, the re-solve is attempted only on chunks whose pass-1 fixed-
+    # linearization residual p95 (over active rows) exceeds this many metres —
+    # i.e. only chunks where pass-1 diverged.  A diagnostic probe showed the only
+    # net win (lax-o) comes from such diverged chunks (p95 ~39 km) while every
+    # regression trip masks marginal outliers on a healthy pass-1 fit (p95
+    # <= ~50 m), so gating on divergence keeps the rescue and drops the +10 cm
+    # regressions.
+    fgo_two_stage_divergence_p95_m: float = 0.0
     tdcp_weight_scale: float = DEFAULT_TDCP_WEIGHT_SCALE
     tdcp_l5_weight_scale: float = 1.0
     tdcp_geometry_correction: bool = DEFAULT_TDCP_GEOMETRY_CORRECTION
@@ -412,6 +421,9 @@ def __post_init__(self) -> None:
                 raise ValueError(f"{name} must be finite and > 0")
         if int(self.fgo_two_stage_residual_min_keep) < 4:
             raise ValueError("fgo_two_stage_residual_min_keep must be >= 4")
+        divergence_p95 = float(self.fgo_two_stage_divergence_p95_m)
+        if not np.isfinite(divergence_p95) or divergence_p95 < 0.0:
+            raise ValueError("fgo_two_stage_divergence_p95_m must be finite and >= 0")
         for name in (
             "stop_velocity_huber_k",
             "stop_position_huber_k",
diff --git a/experiments/gsdc2023_raw_bridge.py b/experiments/gsdc2023_raw_bridge.py
index 4d28aa17..36ae34ce 100644
--- a/experiments/gsdc2023_raw_bridge.py
+++ b/experiments/gsdc2023_raw_bridge.py
@@ -2324,12 +2324,24 @@ def two_stage_residual_resolve_vd(
     threshold_l5_m: float,
     min_keep: int,
     guard: bool = True,
+    divergence_p95_m: float = 0.0,
     vd_kwargs: dict,
 ) -> tuple[int, float]:
     """taroz-style 2-stage residual re-solve for the VD path. Runs ``solver_fn``
     (the VD solver), recomputes the fixed-linearization residual at the converged
     ``state``, masks outliers above threshold, and re-solves warm-started.
 
+    ``divergence_p95_m`` is the recommended acceptance criterion (default 0 =
+    disabled). When > 0, the re-solve is attempted ONLY on chunks whose pass-1
+    fit has diverged — i.e. the 95th percentile of the pass-1 fixed-linearization
+    residual over active rows exceeds the threshold. A diagnostic probe across
+    win / wash / regression trips showed the only net win (lax-o) comes from a
+    handful of chunks where pass-1 diverged to multi-km residuals (p95 ~39 km),
+    while every regression trip masks only marginal outliers on a healthy pass-1
+    fit (p95 <= ~50 m); gating on pass-1 divergence keeps the rescue and skips
+    the perturbations that cause the +10 cm regressions. The chunk is left
+    untouched (pass-1 kept) when its pass-1 fit is healthy.
+
     When ``guard`` is True the re-solve is kept only if it lowers the full-set
     Huber guard cost (a trust region), otherwise ``state`` is restored to the
     pass-1 result. **The guard is OFF by default in production**: the dense-urban
@@ -2337,12 +2349,12 @@ def two_stage_residual_resolve_vd(
     the position toward truth but *raises* the robust residual cost (the dropped
     outliers are re-added at their original weight, Huber-capped), so the guard
     rejects exactly the re-solves we want. See PR / memory notes. The guard is
-    retained for experimentation only.
+    retained for experimentation only and is independent of the divergence gate.
 
     Returns ``(iters, mse)`` and mutates ``state`` in place (matching the native
     solver contract). A no-op returning the pass-1 result when fixed
-    linearization inputs are absent, nothing is masked, the re-solve fails, or the
-    guard (when enabled) rejects it.
+    linearization inputs are absent, the pass-1 fit is healthy (divergence gate),
+    nothing is masked, the re-solve fails, or the guard (when enabled) rejects it.
     """
     iters1, mse1 = solver_fn(sat_ecef, pseudorange, weights, state, **vd_kwargs)
     ref = vd_kwargs.get("pr_linearization_ref_ecef")
@@ -2356,6 +2368,14 @@ def two_stage_residual_resolve_vd(
         sys_kind, pseudorange.shape, threshold_l1_m, threshold_l5_m
     )
     resid1 = _pr_linearized_residual(pseudorange, los, ref, state, sys_kind, n_clock)
+    if float(divergence_p95_m) > 0.0:
+        w = np.asarray(weights, dtype=np.float64)
+        active = np.isfinite(w) & (w > 0.0)
+        ar = np.abs(resid1)[active]
+        ar = ar[np.isfinite(ar)]
+        pass1_p95 = float(np.percentile(ar, 95)) if ar.size else 0.0
+        if pass1_p95 <= float(divergence_p95_m):
+            return iters1, mse1  # pass-1 fit is healthy: skip the re-solve
     new_w, n_masked = _pr_two_stage_mask(resid1, weights, thresholds, int(min_keep))
     if n_masked == 0:
         return iters1, mse1
@@ -2687,6 +2707,7 @@ def run_fgo_chunked(
     fgo_two_stage_residual_threshold_l5_m: float = 15.0,
     fgo_two_stage_residual_min_keep: int = 5,
     fgo_two_stage_residual_guard: bool = False,
+    fgo_two_stage_divergence_p95_m: float = 0.0,
     fgo_seed_state: np.ndarray | None = None,
 ) -> "ChunkedFgoRun":
     n_epoch = batch.sat_ecef.shape[0]
@@ -3251,6 +3272,7 @@ def run_fgo_chunked(
                             threshold_l5_m=fgo_two_stage_residual_threshold_l5_m,
                             min_keep=fgo_two_stage_residual_min_keep,
                             guard=fgo_two_stage_residual_guard,
+                            divergence_p95_m=fgo_two_stage_divergence_p95_m,
                             vd_kwargs=vd_kwargs,
                         )
                     else:
@@ -3708,6 +3730,9 @@ def solve_trip(
     fgo_run_kwargs["fgo_two_stage_residual_guard"] = bool(
         getattr(config, "fgo_two_stage_residual_guard", False)
     )
+    fgo_run_kwargs["fgo_two_stage_divergence_p95_m"] = float(
+        getattr(config, "fgo_two_stage_divergence_p95_m", 0.0)
+    )
     taroz_candidate_sources = _taroz_fgo_candidate_sources_enabled(config)
     taroz_candidate_base_run_kwargs = dict(fgo_run_kwargs)
     effective_trip_type: str | None = None
diff --git a/tests/test_two_stage_residual_resolve.py b/tests/test_two_stage_residual_resolve.py
index aef8641c..1f2389e8 100644
--- a/tests/test_two_stage_residual_resolve.py
+++ b/tests/test_two_stage_residual_resolve.py
@@ -190,6 +190,45 @@ def test_resolve_keeps_pass2_when_guard_disabled():
     assert state[0, 6] == -50.0  # pass-2 state kept despite worse Huber cost
 
 
+def test_divergence_gate_skips_healthy_pass1():
+    # One row just over the 20 m mask threshold (would normally mask + re-solve),
+    # but the pass-1 residual p95 is low (~18 m) -> the divergence gate skips the
+    # re-solve entirely, so pass-1 is kept untouched.
+    z = np.asarray([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 21.0]], dtype=np.float64)
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=1.0)
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5,
+        divergence_p95_m=500.0, vd_kwargs=kw)
+    assert solver.calls == 1  # healthy pass-1 -> gate skips the re-solve
+    assert iters == 5
+    assert state[0, 6] == 0.0
+
+
+def test_divergence_gate_fires_on_diverged_pass1():
+    # Pass-1 diverged: every residual ~600 m (p95 > 500 m gate) -> the re-solve
+    # fires even with the divergence gate enabled.
+    z = np.full((1, 7), 600.0, dtype=np.float64)
+    w = np.ones((1, 7), dtype=np.float64)
+    state = _state([0.0, 0.0, 0.0], c0=0.0)
+    los = np.zeros((1, 7, 3)); ref = np.zeros((1, 3))
+    kw = dict(pr_linearization_ref_ecef=ref, pr_linearization_los_ecef=los,
+              sys_kind=None, n_clock=1)
+    solver = _MockSolver(pass1_c0=0.0, pass2_c0=1.0)
+    iters, _mse = two_stage_residual_resolve_vd(
+        solver, None, z, w, state,
+        threshold_l1_m=20.0, threshold_l5_m=15.0, min_keep=5,
+        divergence_p95_m=500.0, vd_kwargs=kw)
+    assert solver.calls == 2  # diverged pass-1 -> re-solve fires
+    assert iters == 12
+    assert state[0, 6] == 1.0
+
+
 def test_resolve_noop_without_fixed_linearization():
     z = np.asarray([[100.0, 100.0, 100.0, 100.0, 100.0, 100.0]], dtype=np.float64)
     w = np.ones((1, 6), dtype=np.float64)