bacemtayeb · bacemtayeb · Jun 17, 2026 · Jun 17, 2026
diff --git a/scripts/evaluate.py b/scripts/evaluate.py
@@ -15,7 +15,7 @@
 from sumlens.eval.ablation import ablation_table
 from sumlens.types import AnalysisConfig
 
-_COLUMNS = ["condition", "precision", "recall", "f1", "ece"]
+_COLUMNS = ["condition", "roc_auc", "pr_auc", "precision", "recall", "f1", "ece"]
 
 
 def _read(path: Path) -> list[dict[str, str]]:

diff --git a/scripts/extract_features.py b/scripts/extract_features.py
@@ -1,11 +1,12 @@
-"""Run signals A/B(/C) over a RAGTruth split and write a fusion features CSV.
+"""Run signals A/B/C over a RAGTruth split and write a fusion features CSV.
 
-For each summary sentence: classifier (A), NLI (B), and optionally attribution (C)
-scores + the grounded gold label. Output feeds scripts/train_fusion.py.
+For each summary sentence: classifier (A), NLI (B), and support attribution (C =
+attr_conc + attr_loo) scores + the grounded gold label. Output feeds the ablation.
 
-Attribution is off by default: RAGTruth summaries were not generated by our local
-model, so Inseq attribution is not well-defined for them (see research-plan.md §8).
-Enable with --with-attribution only when summaries come from our own summariser.
+Signal C here is the generator-agnostic support attribution (signals/support.py),
+derived from an NLI matrix, so it is well-defined for RAGTruth even though those
+summaries were not generated by our local model (unlike Inseq attribution; see
+research-plan.md §8). It therefore always runs.
 
 This runs the REAL models — launch on HPC (si-gpu / sbatch), not in CI.
 """
@@ -16,9 +17,9 @@
 
 from sumlens.eval.features import FIELDNAMES, feature_rows
 from sumlens.eval.ragtruth import load_split
-from sumlens.signals.attribution import attribute
 from sumlens.signals.classifier import classify
 from sumlens.signals.nli import entail, extract_claims
+from sumlens.signals.support import support_attribution
 from sumlens.types import AnalysisConfig
 
 
@@ -27,7 +28,6 @@ def main() -> None:
     parser.add_argument("--data-dir", type=Path, default=Path("data/ragtruth"))
     parser.add_argument("--split", default="train")
     parser.add_argument("--out", type=Path, default=Path("features.csv"))
-    parser.add_argument("--with-attribution", action="store_true")
     parser.add_argument("--limit", type=int, default=0, help="cap summaries (0 = all)")
     args = parser.parse_args()
 
@@ -40,9 +40,9 @@ def main() -> None:
     for document, summary, hallucinated in examples:
         classifier_out = classify(document, summary, cfg)
         nli_out = entail(extract_claims(summary), document, cfg)
-        attribution_out = attribute(document, summary, cfg) if args.with_attribution else {}
+        support_out = support_attribution(document, summary, cfg)
         rows.extend(
-            feature_rows(summary, hallucinated, classifier_out, nli_out, attribution_out)
+            feature_rows(summary, hallucinated, classifier_out, nli_out, support_out)
         )
 
     with args.out.open("w", encoding="utf-8", newline="") as fh:

diff --git a/scripts/jobs/run_eval.sbatch b/scripts/jobs/run_eval.sbatch
@@ -45,7 +45,9 @@ python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
 # after a crash/timeout resumes instead of redoing finished work) ---
 echo ">>> extract features (train)"; [ -f features_train.csv ] || python scripts/extract_features.py --split train --data-dir data/ragtruth --out features_train.csv
 echo ">>> extract features (test)";  [ -f features_test.csv ]  || python scripts/extract_features.py --split test  --data-dir data/ragtruth --out features_test.csv
-echo ">>> train fusion";             [ -f models/fusion.pkl ]  || python scripts/train_fusion.py --features features_train.csv --out-dir models
+# train_fusion (live model) is intentionally skipped: this experiment compares
+# signals via the ablation, which fits its own per-subset models. Promote a live
+# fusion model only after the ablation shows the new attribution signals help.
 echo ">>> ablation table";           [ -f ablation.csv ]       || python scripts/evaluate.py --train features_train.csv --test features_test.csv --out ablation.csv
 
 echo "=== DONE $(date) ==="

diff --git a/scripts/train_fusion.py b/scripts/train_fusion.py
@@ -14,13 +14,19 @@
 from sumlens.fuse import fit_fusion, fit_platt
 
 
+def _num(value: str, impute: float = 0.5) -> float:
+    # A signal column is empty when that signal was off for the run (e.g.
+    # attribution is off for RAGTruth). Impute neutral, matching the ablation.
+    return impute if value == "" else float(value)
+
+
 def _read(path: Path) -> tuple[list[list[float]], list[int]]:
     features: list[list[float]] = []
     grounded: list[int] = []
     with path.open(encoding="utf-8") as fh:
         for row in csv.DictReader(fh):
             features.append(
-                [float(row["classifier"]), float(row["nli"]), float(row["attribution"])]
+                [_num(row["classifier"]), _num(row["nli"]), _num(row["attribution"])]
             )
             grounded.append(int(row["grounded"]))
     return features, grounded

diff --git a/sumlens/eval/ablation.py b/sumlens/eval/ablation.py
@@ -1,22 +1,24 @@
 """Ablation over signal subsets — the report's centrepiece table.
 
-For each non-empty subset of {classifier (A), NLI (B), attribution (C)} we fit a
-fusion LogisticRegression on the train rows (using only that subset's columns),
-predict on the test rows, and report detection precision/recall/F1 (positive class
-= hallucinated) plus the calibration error of the grounding probability.
-
-Rows are mappings with keys: classifier, nli, attribution (float or None/""),
-and grounded (1 grounded / 0 hallucinated). Missing signal values are imputed.
+For each non-empty subset of {classifier (A), NLI (B), attr_conc (C), attr_loo (D)}
+we fit a fusion LogisticRegression on the train rows (using only that subset's
+columns), predict on the test rows, and report detection precision/recall/F1
+(positive class = hallucinated) plus the calibration error of the grounding
+probability. C and D are the two scalars of the generator-agnostic support
+attribution (signals/support.py).
+
+Rows are mappings with keys: classifier, nli, attr_conc, attr_loo (float or
+None/""), and grounded (1 grounded / 0 hallucinated). Missing values are imputed.
 """
 
 from collections.abc import Mapping, Sequence
 from itertools import combinations
 
-from sumlens.eval.metrics import expected_calibration_error
+from sumlens.eval.metrics import expected_calibration_error, pr_auc, roc_auc
 from sumlens.fuse import fit_fusion
 
-_SIGNALS = ("classifier", "nli", "attribution")
-_LETTER = {"classifier": "A", "nli": "B", "attribution": "C"}
+_SIGNALS = ("classifier", "nli", "attr_conc", "attr_loo")
+_LETTER = {"classifier": "A", "nli": "B", "attr_conc": "C", "attr_loo": "D"}
 
 Row = Mapping[str, object]
 
@@ -46,8 +48,15 @@ def _evaluate_combo(
     true_hallucinated = [1 - g for g in y_test]
     precision, recall, f1 = _prf(true_hallucinated, pred_hallucinated)
 
+    # Threshold-free detection metrics (positive class = hallucinated). f1 above
+    # is a single fixed-0.5 operating point and is misleading under the heavy
+    # hallucination class imbalance; roc_auc/pr_auc are the headline numbers.
+    proba_hallucinated = [1.0 - p for p in grounded_proba]
+
     return {
         "condition": "+".join(_LETTER[s] for s in combo),
+        "roc_auc": roc_auc(proba_hallucinated, true_hallucinated),
+        "pr_auc": pr_auc(proba_hallucinated, true_hallucinated),
         "precision": precision,
         "recall": recall,
         "f1": f1,

diff --git a/sumlens/eval/features.py b/sumlens/eval/features.py
@@ -1,24 +1,34 @@
 """Assemble fusion training rows from per-sentence signal outputs + gold labels.
 
-One row per summary sentence: the three signal scores (None if a signal did not
-run for that sentence) and the grounded label (1 if grounded, 0 if the RAGTruth
-gold marks the sentence hallucinated). This pure function is the testable core of
-`scripts/extract_features.py`, which supplies the real signal outputs.
+One row per summary sentence: the signal scores (None if a signal did not run for
+that sentence) and the grounded label (1 if grounded, 0 if the RAGTruth gold marks
+the sentence hallucinated). Signal C is the generator-agnostic support attribution
+(`signals/support.py`), which yields two scalars per sentence: attr_conc (support
+concentration) and attr_loo (best-supporter necessity margin). This pure function
+is the testable core of `scripts/extract_features.py`.
 """
 
 from collections.abc import Mapping
 
 from sumlens.types import Claim, Summary
 
-FIELDNAMES = ["summary_id", "sentence_id", "classifier", "nli", "attribution", "grounded"]
+FIELDNAMES = [
+    "summary_id",
+    "sentence_id",
+    "classifier",
+    "nli",
+    "attr_conc",
+    "attr_loo",
+    "grounded",
+]
 
 
 def feature_rows(
     summary: Summary,
     hallucinated_ids: list[str],
     classifier_out: dict[str, tuple[float, list[tuple[int, int]]]],
     nli_out: dict[str, tuple[float, list[Claim]]],
-    attribution_out: dict[str, tuple[float, list[str]]],
+    support_out: dict[str, tuple[float, float, list[str]]],
 ) -> list[dict[str, object]]:
     hallucinated = set(hallucinated_ids)
     rows: list[dict[str, object]] = []
@@ -27,15 +37,18 @@ def feature_rows(
             {
                 "summary_id": summary.id,
                 "sentence_id": sentence.id,
-                "classifier": _score(classifier_out, sentence.id),
-                "nli": _score(nli_out, sentence.id),
-                "attribution": _score(attribution_out, sentence.id),
+                "classifier": _at(classifier_out, sentence.id, 0),
+                "nli": _at(nli_out, sentence.id, 0),
+                "attr_conc": _at(support_out, sentence.id, 0),
+                "attr_loo": _at(support_out, sentence.id, 1),
                 "grounded": 0 if sentence.id in hallucinated else 1,
             }
         )
     return rows
 
 
-def _score(signal_out: Mapping[str, tuple[float, object]], sentence_id: str) -> float | None:
+def _at(
+    signal_out: Mapping[str, tuple[object, ...]], sentence_id: str, index: int
+) -> float | None:
     entry = signal_out.get(sentence_id)
-    return entry[0] if entry is not None else None
+    return float(entry[index]) if entry is not None else None  # type: ignore[arg-type]
diff --git a/sumlens/eval/metrics.py b/sumlens/eval/metrics.py
@@ -22,6 +22,51 @@ def sentence_f1(preds: dict[str, set[str]], golds: dict[str, set[str]]) -> dict[
     return {"precision": precision, "recall": recall, "f1": f1}
 
 
+def roc_auc(scores: list[float], labels: list[int]) -> float:
+    """Threshold-free ROC-AUC (rank-based, ties averaged). `scores` rank the
+    positive class (label 1). Returns 0.0 if either class is absent."""
+    n_pos = sum(labels)
+    n_neg = len(labels) - n_pos
+    if not n_pos or not n_neg:
+        return 0.0
+    order = sorted(zip(scores, labels, strict=True), key=lambda p: p[0])
+    ranks = [0.0] * len(order)
+    i = 0
+    while i < len(order):
+        j = i
+        while j < len(order) and order[j][0] == order[i][0]:
+            j += 1
+        rank = (i + j - 1) / 2 + 1  # 1-based average rank for the tie group
+        for k in range(i, j):
+            ranks[k] = rank
+        i = j
+    rank_sum_pos = sum(r for r, (_, label) in zip(ranks, order, strict=True) if label == 1)
+    return (rank_sum_pos - n_pos * (n_pos + 1) / 2) / (n_pos * n_neg)
+
+
+def pr_auc(scores: list[float], labels: list[int]) -> float:
+    """Average precision (area under precision-recall curve). `scores` rank the
+    positive class (label 1). Returns 0.0 if no positives. Better than ROC-AUC
+    under heavy class imbalance; floor is the positive base rate."""
+    n_pos = sum(labels)
+    if not n_pos:
+        return 0.0
+    order = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
+    tp = fp = 0
+    ap = 0.0
+    prev_recall = 0.0
+    for i in order:
+        if labels[i] == 1:
+            tp += 1
+        else:
+            fp += 1
+        recall = tp / n_pos
+        precision = tp / (tp + fp)
+        ap += (recall - prev_recall) * precision
+        prev_recall = recall
+    return ap
+
+
 def expected_calibration_error(
     scores: list[float], labels: list[int], n_bins: int = 10
 ) -> float:

diff --git a/sumlens/signals/support.py b/sumlens/signals/support.py
@@ -0,0 +1,51 @@
+"""Signal C (redesign) — generator-agnostic source attribution from an NLI matrix.
+
+Inseq attribution (`attribution.py`) is gradient-based and needs the *generating*
+model, so it is undefined for RAGTruth (external-model summaries). This signal
+derives attribution from entailment alone, so it is defined for any (source,
+summary) pair. For each summary sentence ``s`` we score entailment against every
+source sentence ``j``, ``M[s][j] = P(src_j entails s)``, then collapse the row:
+
+- ``attr_conc(s) = max_j M - mean_j M`` — support concentration. A grounded
+  sentence has one sharp supporter; a fabricated one has diffuse, flat-low support.
+- ``attr_loo(s)  = top1 - top2`` — necessity margin of the single best supporter.
+- top-k source sentence ids — the UI heatmap (generator-free, no token offsets).
+
+Reuses signal B's NLI model and batched call. Pure given the NLI boundary, which
+tests mock via `_get_nli`. Consumed by `scripts/extract_features.py`.
+"""
+
+from sumlens.signals.nli import _entail_prob, _get_nli
+from sumlens.types import AnalysisConfig, Document, Summary
+
+_BATCH_SIZE = 64
+_TOP_K = 5
+
+
+def support_attribution(
+    document: Document, summary: Summary, cfg: AnalysisConfig
+) -> dict[str, tuple[float, float, list[str]]]:
+    """Per summary sentence: (attr_conc, attr_loo, top-k source sentence ids)."""
+    sources = document.sentences
+    sentences = summary.sentences
+    if not sentences or not sources:
+        return {s.id: (0.0, 0.0, []) for s in sentences}
+
+    nli = _get_nli(cfg.nli_model)
+    pairs = [
+        {"text": src.text, "text_pair": sent.text} for sent in sentences for src in sources
+    ]
+    batched = nli(pairs, top_k=None, batch_size=_BATCH_SIZE)
+    n = len(sources)
+
+    results: dict[str, tuple[float, float, list[str]]] = {}
+    for i, sentence in enumerate(sentences):
+        row = [_entail_prob(scores) for scores in batched[i * n : (i + 1) * n]]
+        order = sorted(range(n), key=lambda j: row[j], reverse=True)
+        top1 = row[order[0]]
+        top2 = row[order[1]] if n > 1 else 0.0
+        conc = top1 - sum(row) / n
+        loo = top1 - top2
+        top_ids = [sources[j].id for j in order[:_TOP_K]]
+        results[sentence.id] = (conc, loo, top_ids)
+    return results
diff --git a/tests/test_ablation.py b/tests/test_ablation.py
@@ -2,28 +2,35 @@
 
 from sumlens.eval.ablation import ablation_table
 
-_GROUNDED = {"classifier": 0.9, "nli": 0.8, "attribution": 0.7, "grounded": 1}
-_HALLUCINATED = {"classifier": 0.1, "nli": 0.2, "attribution": 0.3, "grounded": 0}
+_GROUNDED = {"classifier": 0.9, "nli": 0.8, "attr_conc": 0.7, "attr_loo": 0.6, "grounded": 1}
+_HALLUCINATED = {"classifier": 0.1, "nli": 0.2, "attr_conc": 0.3, "attr_loo": 0.2, "grounded": 0}
 _ROWS = [_GROUNDED, _HALLUCINATED] * 10
 
 
 def test_ablation_table_conditions_and_scores() -> None:
     table = ablation_table(_ROWS, _ROWS)
 
     conditions = {row["condition"] for row in table}
-    assert conditions == {"A", "B", "C", "A+B", "A+C", "B+C", "A+B+C"}
+    assert conditions == {
+        "A", "B", "C", "D",
+        "A+B", "A+C", "A+D", "B+C", "B+D", "C+D",
+        "A+B+C", "A+B+D", "A+C+D", "B+C+D",
+        "A+B+C+D",
+    }
 
     for row in table:
-        for key in ("precision", "recall", "f1", "ece"):
+        for key in ("roc_auc", "pr_auc", "precision", "recall", "f1", "ece"):
             assert isinstance(row[key], float)
 
-    fused = next(row for row in table if row["condition"] == "A+B+C")
+    fused = next(row for row in table if row["condition"] == "A+B+C+D")
     assert fused["f1"] == 1.0  # perfectly separable -> perfect detection
+    assert fused["roc_auc"] == 1.0
+    assert fused["pr_auc"] == 1.0
 
 
 def test_ablation_imputes_missing_attribution() -> None:
-    # attribution missing ("") on every row -> still runs via imputation
-    rows = [{**r, "attribution": ""} for r in _ROWS]
+    # attr_conc missing ("") on every row -> still runs via imputation
+    rows = [{**r, "attr_conc": ""} for r in _ROWS]
     table = ablation_table(rows, rows)
     c_only = next(row for row in table if row["condition"] == "C")
     assert isinstance(c_only["f1"], float)
diff --git a/tests/test_features.py b/tests/test_features.py
@@ -21,25 +21,28 @@ def test_feature_rows_labels_and_missing_signals() -> None:
     classifier_out = {"sum-0000": (0.1, []), "sum-0001": (0.9, [(0, 4)])}
     failed = Claim(id="c", sentence_id="sum-0001", text="x")
     nli_out = {"sum-0000": (0.8, []), "sum-0001": (0.2, [failed])}
-    attribution_out = {"sum-0001": (0.3, ["src-0000"])}  # only the gated sentence has C
+    # support attribution: (attr_conc, attr_loo, top_source_ids); sum-0000 absent
+    support_out = {"sum-0001": (0.3, 0.15, ["src-0000"])}
 
-    rows = feature_rows(_summary(), ["sum-0001"], classifier_out, nli_out, attribution_out)
+    rows = feature_rows(_summary(), ["sum-0001"], classifier_out, nli_out, support_out)
 
     assert rows == [
         {
             "summary_id": "r1",
             "sentence_id": "sum-0000",
             "classifier": 0.1,
             "nli": 0.8,
-            "attribution": None,  # C did not run for this sentence
+            "attr_conc": None,  # C did not run for this sentence
+            "attr_loo": None,
             "grounded": 1,
         },
         {
             "summary_id": "r1",
             "sentence_id": "sum-0001",
             "classifier": 0.9,
             "nli": 0.2,
-            "attribution": 0.3,
+            "attr_conc": 0.3,
+            "attr_loo": 0.15,
             "grounded": 0,  # marked hallucinated in gold
         },
     ]