rsasaki0109 · rsasaki0109 · Jun 13, 2026 · Jun 14, 2026
diff --git a/experiments/build_gsdc2023_bridge_submission.py b/experiments/build_gsdc2023_bridge_submission.py
@@ -343,6 +343,24 @@ def build_config(args: argparse.Namespace) -> BridgeConfig:
         fgo_residual_mask_propagation=bool(
             getattr(args, "fgo_residual_mask_propagation", False)
         ),
+        fgo_two_stage_residual_resolve=bool(
+            getattr(args, "fgo_two_stage_residual_resolve", False)
+        ),
+        fgo_two_stage_residual_threshold_l1_m=float(
+            getattr(args, "fgo_two_stage_residual_threshold_l1_m", 20.0)
+        ),
+        fgo_two_stage_residual_threshold_l5_m=float(
+            getattr(args, "fgo_two_stage_residual_threshold_l5_m", 15.0)
+        ),
+        fgo_two_stage_residual_min_keep=int(
+            getattr(args, "fgo_two_stage_residual_min_keep", 5)
+        ),
+        fgo_two_stage_residual_guard=bool(
+            getattr(args, "fgo_two_stage_residual_guard", False)
+        ),
+        fgo_two_stage_divergence_p95_m=float(
+            getattr(args, "fgo_two_stage_divergence_p95_m", 0.0)
+        ),
     )
     if bool(getattr(args, "taroz_marupaku", False)):
         cfg = apply_taroz_marupaku_preset(cfg)
@@ -589,6 +607,27 @@ def main(argv: list[str] | None = None) -> int:
         default=False,
         help="Mirror the post-fill pseudorange residual mask into the FGO weights (weights_fgo), so residual-rejected PR rows are dropped from the FGO objective too.",
     )
+    parser.add_argument(
+        "--fgo-two-stage-residual-resolve",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="taroz-style 2-stage residual re-solve: after the FGO converges, mask pseudorange rows whose fixed-linearization residual exceeds threshold and re-solve warm-started. Big win on degraded dense-urban chunks (lax-o -54.6cm), at most +0.5cm elsewhere. Default off.",
+    )
+    parser.add_argument("--fgo-two-stage-residual-threshold-l1-m", type=float, default=20.0)
+    parser.add_argument("--fgo-two-stage-residual-threshold-l5-m", type=float, default=15.0)
+    parser.add_argument("--fgo-two-stage-residual-min-keep", type=int, default=5)
+    parser.add_argument(
+        "--fgo-two-stage-residual-guard",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Gate the 2-stage re-solve behind a Huber trust-region cost (keep pass-2 only if the full-set Huber residual cost improves). OFF by default: the dense-urban win comes from dropping biased NLOS rows, which lowers position error but *raises* the Huber cost, so the guard rejects exactly the re-solves we want. Retained for experimentation.",
+    )
+    parser.add_argument(
+        "--fgo-two-stage-divergence-p95-m",
+        type=float,
+        default=0.0,
+        help="Recommended acceptance gate for the 2-stage re-solve (0 = disabled). When > 0, the re-solve only fires on chunks whose pass-1 fixed-linearization residual p95 (over active rows) exceeds this many metres — i.e. only chunks where pass-1 diverged. Keeps the lax-o divergence-rescue win while skipping the marginal masking that causes +10 cm regressions on healthy chunks.",
+    )
     parser.add_argument("--stop-attitude-sigma-rad", type=float, default=0.0)
     parser.add_argument(
         "--taroz-fgo",

diff --git a/experiments/gsdc2023_bridge_config.py b/experiments/gsdc2023_bridge_config.py
@@ -281,6 +281,29 @@ class BridgeConfig:
     # taroz's, while P-residual-only improved all 3 probe trips
     # (mtv-h -8 cm, sjc-q -2 cm, lax-o -1.10 m FGO score).
     fgo_residual_mask_propagation: bool = False
+    # taroz-style 2-stage residual re-solve for the FGO (VD path only). After the
+    # first converged solve, recompute the per-measurement residual in the fixed-
+    # linearization domain (z - (los . (x - ref) + clk)), mask rows whose residual
+    # exceeds threshold (L1 / L5), and RE-SOLVE warm-started. A trust-region guard
+    # only keeps the re-solve when it lowers a Huber cost (kernel = the mask
+    # threshold) over the full measurement set, so trips that do not benefit are
+    # left bit-identical. Big win on degraded dense-urban chunks (lax-o FGO score
+    # -54.6 cm), neutral elsewhere. Requires fixed linearization (the default
+    # gnss_only preset path); a no-op when pr-linearization inputs are absent.
+    fgo_two_stage_residual_resolve: bool = False
+    fgo_two_stage_residual_threshold_l1_m: float = 20.0
+    fgo_two_stage_residual_threshold_l5_m: float = 15.0
+    fgo_two_stage_residual_min_keep: int = 5
+    fgo_two_stage_residual_guard: bool = False
+    # Recommended acceptance gate for the 2-stage re-solve (0 = disabled).  When
+    # > 0, the re-solve is attempted only on chunks whose pass-1 fixed-
+    # linearization residual p95 (over active rows) exceeds this many metres —
+    # i.e. only chunks where pass-1 diverged.  A diagnostic probe showed the only
+    # net win (lax-o) comes from such diverged chunks (p95 ~39 km) while every
+    # regression trip masks marginal outliers on a healthy pass-1 fit (p95
+    # <= ~50 m), so gating on divergence keeps the rescue and drops the +10 cm
+    # regressions.
+    fgo_two_stage_divergence_p95_m: float = 0.0
     tdcp_weight_scale: float = DEFAULT_TDCP_WEIGHT_SCALE
     tdcp_l5_weight_scale: float = 1.0
     tdcp_geometry_correction: bool = DEFAULT_TDCP_GEOMETRY_CORRECTION
@@ -385,6 +408,22 @@ def __post_init__(self) -> None:
             raise ValueError("fgo_lm_damping must be >= 0")
         if not isinstance(self.fgo_extra_constellations, bool):
             raise ValueError("fgo_extra_constellations must be a bool")
+        if not isinstance(self.fgo_two_stage_residual_resolve, bool):
+            raise ValueError("fgo_two_stage_residual_resolve must be a bool")
+        if not isinstance(self.fgo_two_stage_residual_guard, bool):
+            raise ValueError("fgo_two_stage_residual_guard must be a bool")
+        for name in (
+            "fgo_two_stage_residual_threshold_l1_m",
+            "fgo_two_stage_residual_threshold_l5_m",
+        ):
+            value = float(getattr(self, name))
+            if not np.isfinite(value) or value <= 0.0:
+                raise ValueError(f"{name} must be finite and > 0")
+        if int(self.fgo_two_stage_residual_min_keep) < 4:
+            raise ValueError("fgo_two_stage_residual_min_keep must be >= 4")
+        divergence_p95 = float(self.fgo_two_stage_divergence_p95_m)
+        if not np.isfinite(divergence_p95) or divergence_p95 < 0.0:
+            raise ValueError("fgo_two_stage_divergence_p95_m must be finite and >= 0")
         for name in (
             "stop_velocity_huber_k",
             "stop_position_huber_k",

diff --git a/experiments/gsdc2023_raw_bridge.py b/experiments/gsdc2023_raw_bridge.py
@@ -2212,6 +2212,188 @@ def _fixed_pr_linearization_inputs(
     return measurement, solver_weights, ref, los
 
 
+def _pr_linearized_residual(
+    pseudorange: np.ndarray,
+    los: np.ndarray,
+    ref: np.ndarray,
+    state: np.ndarray,
+    sys_kind: np.ndarray | None,
+    n_clock: int,
+) -> np.ndarray:
+    """Per-measurement pseudorange residual in the fixed-linearization domain.
+
+    The VD solver is fed ``z = PR_raw - range(ref)`` together with the line-of-
+    sight ``los`` and linearization point ``ref``; its model is
+    ``z - (los . (x - ref) + clk)`` where ``clk = c0 + (c_sk if sk>0)`` (per-
+    system / per-signal clock from the VD state). This recomputes that residual
+    at the converged ``state`` so outliers only visible after convergence can be
+    masked. Returns ``(T, n_sat)`` in metres.
+    """
+    z = np.asarray(pseudorange, dtype=np.float64)
+    los = np.asarray(los, dtype=np.float64)
+    ref = np.asarray(ref, dtype=np.float64)
+    xyz = np.asarray(state[:, :3], dtype=np.float64)
+    dx = xyz - ref
+    pred_geom = np.einsum("tsj,tj->ts", los, dx)
+    nc = int(n_clock)
+    clocks = np.asarray(state[:, 6 : 6 + nc], dtype=np.float64)
+    clk_common = clocks[:, 0:1]
+    if sys_kind is not None and nc > 1:
+        sk = np.asarray(sys_kind, dtype=np.int64)
+        sk_clamped = np.clip(sk, 0, nc - 1)
+        isb = np.take_along_axis(clocks, sk_clamped, axis=1)
+        clk = clk_common + np.where(sk > 0, isb, 0.0)
+    else:
+        clk = np.broadcast_to(clk_common, z.shape)
+    return z - (pred_geom + clk)
+
+
+def _pr_resolve_thresholds(
+    sys_kind: np.ndarray | None,
+    shape: tuple[int, ...],
+    threshold_l1_m: float,
+    threshold_l5_m: float,
+) -> np.ndarray:
+    """Per-row mask threshold: L5 (clock kind ``sk >= 4``) vs L1 (everything
+    else), matching the signal-aware ``n_clock=7`` layout."""
+    if sys_kind is None:
+        return np.full(shape, float(threshold_l1_m), dtype=np.float64)
+    sk = np.asarray(sys_kind, dtype=np.int64)
+    return np.where(sk >= 4, float(threshold_l5_m), float(threshold_l1_m)).astype(np.float64)
+
+
+def _pr_huber_guard_cost(
+    resid: np.ndarray, weights: np.ndarray, thresholds: np.ndarray
+) -> float:
+    """Sum of ``w * huber_rho(resid; c=threshold)`` over finite, positively
+    weighted rows. The Huber kernel caps each outlier's contribution at the mask
+    threshold, so removing-and-refitting outliers cannot inflate the cost the way
+    a plain L2 cost would (the production path runs ``huber_k=0`` = L2). This is
+    the trust-region acceptance metric for the re-solve.
+    """
+    w = np.asarray(weights, dtype=np.float64)
+    r = np.abs(np.asarray(resid, dtype=np.float64))
+    c = np.asarray(thresholds, dtype=np.float64)
+    valid = np.isfinite(r) & np.isfinite(w) & (w > 0.0)
+    rho = np.where(r <= c, 0.5 * r * r, c * (r - 0.5 * c))
+    return float(np.sum(w[valid] * rho[valid]))
+
+
+def _pr_two_stage_mask(
+    resid: np.ndarray, weights: np.ndarray, thresholds: np.ndarray, min_keep: int
+) -> tuple[np.ndarray, int]:
+    """Return a copy of ``weights`` with outlier rows (``|resid| > threshold``)
+    zeroed, never dropping an epoch below ``min_keep`` surviving rows (worst-
+    first when the cap binds). Returns ``(new_weights, n_masked)``."""
+    new_w = np.asarray(weights, dtype=np.float64).copy()
+    r = np.abs(np.asarray(resid, dtype=np.float64))
+    c = np.asarray(thresholds, dtype=np.float64)
+    n_masked = 0
+    for i in range(new_w.shape[0]):
+        idx = np.flatnonzero(new_w[i] > 0.0)
+        if idx.size <= min_keep:
+            continue
+        ri = r[i, idx]
+        ci = c[i, idx]
+        finite = np.isfinite(ri)
+        out = finite & (ri > ci)
+        if not out.any():
+            continue
+        if int(idx.size - int(out.sum())) < min_keep:
+            order = np.argsort(-np.where(finite, ri, -np.inf))
+            droppable = order[: max(0, idx.size - min_keep)]
+            keep = np.zeros(idx.size, dtype=bool)
+            sel = droppable[finite[droppable] & (ri[droppable] > ci[droppable])]
+            keep[sel] = True
+            out = keep
+            if not out.any():
+                continue
+        new_w[i, idx[out]] = 0.0
+        n_masked += int(out.sum())
+    return new_w, n_masked
+
+
+def two_stage_residual_resolve_vd(
+    solver_fn,
+    sat_ecef: np.ndarray,
+    pseudorange: np.ndarray,
+    weights: np.ndarray,
+    state: np.ndarray,
+    *,
+    threshold_l1_m: float,
+    threshold_l5_m: float,
+    min_keep: int,
+    guard: bool = True,
+    divergence_p95_m: float = 0.0,
+    vd_kwargs: dict,
+) -> tuple[int, float]:
+    """taroz-style 2-stage residual re-solve for the VD path. Runs ``solver_fn``
+    (the VD solver), recomputes the fixed-linearization residual at the converged
+    ``state``, masks outliers above threshold, and re-solves warm-started.
+
+    ``divergence_p95_m`` is the recommended acceptance criterion (default 0 =
+    disabled). When > 0, the re-solve is attempted ONLY on chunks whose pass-1
+    fit has diverged — i.e. the 95th percentile of the pass-1 fixed-linearization
+    residual over active rows exceeds the threshold. A diagnostic probe across
+    win / wash / regression trips showed the only net win (lax-o) comes from a
+    handful of chunks where pass-1 diverged to multi-km residuals (p95 ~39 km),
+    while every regression trip masks only marginal outliers on a healthy pass-1
+    fit (p95 <= ~50 m); gating on pass-1 divergence keeps the rescue and skips
+    the perturbations that cause the +10 cm regressions. The chunk is left
+    untouched (pass-1 kept) when its pass-1 fit is healthy.
+
+    When ``guard`` is True the re-solve is kept only if it lowers the full-set
+    Huber guard cost (a trust region), otherwise ``state`` is restored to the
+    pass-1 result. **The guard is OFF by default in production**: the dense-urban
+    win comes from dropping systematically biased NLOS measurements, which snaps
+    the position toward truth but *raises* the robust residual cost (the dropped
+    outliers are re-added at their original weight, Huber-capped), so the guard
+    rejects exactly the re-solves we want. See PR / memory notes. The guard is
+    retained for experimentation only and is independent of the divergence gate.
+
+    Returns ``(iters, mse)`` and mutates ``state`` in place (matching the native
+    solver contract). A no-op returning the pass-1 result when fixed
+    linearization inputs are absent, the pass-1 fit is healthy (divergence gate),
+    nothing is masked, the re-solve fails, or the guard (when enabled) rejects it.
+    """
+    iters1, mse1 = solver_fn(sat_ecef, pseudorange, weights, state, **vd_kwargs)
+    ref = vd_kwargs.get("pr_linearization_ref_ecef")
+    los = vd_kwargs.get("pr_linearization_los_ecef")
+    if int(iters1) < 0 or ref is None or los is None:
+        return iters1, mse1
+    sys_kind = vd_kwargs.get("sys_kind")
+    n_clock = int(vd_kwargs.get("n_clock", 1))
+    pseudorange = np.asarray(pseudorange, dtype=np.float64)
+    thresholds = _pr_resolve_thresholds(
+        sys_kind, pseudorange.shape, threshold_l1_m, threshold_l5_m
+    )
+    resid1 = _pr_linearized_residual(pseudorange, los, ref, state, sys_kind, n_clock)
+    if float(divergence_p95_m) > 0.0:
+        w = np.asarray(weights, dtype=np.float64)
+        active = np.isfinite(w) & (w > 0.0)
+        ar = np.abs(resid1)[active]
+        ar = ar[np.isfinite(ar)]
+        pass1_p95 = float(np.percentile(ar, 95)) if ar.size else 0.0
+        if pass1_p95 <= float(divergence_p95_m):
+            return iters1, mse1  # pass-1 fit is healthy: skip the re-solve
+    new_w, n_masked = _pr_two_stage_mask(resid1, weights, thresholds, int(min_keep))
+    if n_masked == 0:
+        return iters1, mse1
+    cost1 = _pr_huber_guard_cost(resid1, weights, thresholds) if guard else 0.0
+    backup = np.array(state, dtype=np.float64, copy=True)
+    iters2, mse2 = solver_fn(sat_ecef, pseudorange, new_w, state, **vd_kwargs)
+    if int(iters2) < 0:
+        state[:, :] = backup
+        return iters1, mse1
+    if guard:
+        resid2 = _pr_linearized_residual(pseudorange, los, ref, state, sys_kind, n_clock)
+        cost2 = _pr_huber_guard_cost(resid2, weights, thresholds)
+        if not (cost2 < cost1):
+            state[:, :] = backup  # guard rejects: re-solve did not improve the fit
+            return iters1, mse1
+    return int(iters1) + int(iters2), mse2
+
+
 def _fixed_doppler_linearization_inputs(
     sat_ecef: np.ndarray,
     sat_vel: np.ndarray | None,
@@ -2520,6 +2702,12 @@ def run_fgo_chunked(
     fgo_huber_k_doppler: float = 0.0,
     fgo_huber_k_tdcp: float = 0.0,
     fgo_fixed_linearization: bool = False,
+    fgo_two_stage_residual_resolve: bool = False,
+    fgo_two_stage_residual_threshold_l1_m: float = 20.0,
+    fgo_two_stage_residual_threshold_l5_m: float = 15.0,
+    fgo_two_stage_residual_min_keep: int = 5,
+    fgo_two_stage_residual_guard: bool = False,
+    fgo_two_stage_divergence_p95_m: float = 0.0,
     fgo_seed_state: np.ndarray | None = None,
 ) -> "ChunkedFgoRun":
     n_epoch = batch.sat_ecef.shape[0]
@@ -3069,6 +3257,24 @@ def run_fgo_chunked(
                             huber_k_warmstart=float(fgo_huber_k_pr),
                             **vd_kwargs,
                         )
+                    elif (
+                        fgo_two_stage_residual_resolve
+                        and pr_linearization_ref_ecef is not None
+                        and pr_linearization_los_ecef is not None
+                    ):
+                        iters, _ = two_stage_residual_resolve_vd(
+                            fgo_gnss_lm_vd,
+                            sat_ecef_for_solver,
+                            pseudorange_for_solver,
+                            fgo_weights_for_solver,
+                            seg_state_for_solver,
+                            threshold_l1_m=fgo_two_stage_residual_threshold_l1_m,
+                            threshold_l5_m=fgo_two_stage_residual_threshold_l5_m,
+                            min_keep=fgo_two_stage_residual_min_keep,
+                            guard=fgo_two_stage_residual_guard,
+                            divergence_p95_m=fgo_two_stage_divergence_p95_m,
+                            vd_kwargs=vd_kwargs,
+                        )
                     else:
                         iters, _ = fgo_gnss_lm_vd(
                             sat_ecef_for_solver,
@@ -3509,6 +3715,24 @@ def solve_trip(
     fgo_run_options = _fgo_run_options_from_config(config)
     fgo_run_kwargs = fgo_run_options.run_kwargs()
     fgo_run_kwargs["vd_seed_factor_guard"] = _vd_seed_factor_guard_enabled_for_phone(phone_name)
+    fgo_run_kwargs["fgo_two_stage_residual_resolve"] = bool(
+        getattr(config, "fgo_two_stage_residual_resolve", False)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_threshold_l1_m"] = float(
+        getattr(config, "fgo_two_stage_residual_threshold_l1_m", 20.0)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_threshold_l5_m"] = float(
+        getattr(config, "fgo_two_stage_residual_threshold_l5_m", 15.0)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_min_keep"] = int(
+        getattr(config, "fgo_two_stage_residual_min_keep", 5)
+    )
+    fgo_run_kwargs["fgo_two_stage_residual_guard"] = bool(
+        getattr(config, "fgo_two_stage_residual_guard", False)
+    )
+    fgo_run_kwargs["fgo_two_stage_divergence_p95_m"] = float(
+        getattr(config, "fgo_two_stage_divergence_p95_m", 0.0)
+    )
     taroz_candidate_sources = _taroz_fgo_candidate_sources_enabled(config)
     taroz_candidate_base_run_kwargs = dict(fgo_run_kwargs)
     effective_trip_type: str | None = None