diff --git a/scripts/evaluate.py b/scripts/evaluate.py
index 5489990..293c47c 100644
--- a/scripts/evaluate.py
+++ b/scripts/evaluate.py
@@ -15,7 +15,7 @@
 from sumlens.eval.ablation import ablation_table
 from sumlens.types import AnalysisConfig
 
-_COLUMNS = ["condition", "precision", "recall", "f1", "ece"]
+_COLUMNS = ["condition", "roc_auc", "pr_auc", "precision", "recall", "f1", "ece"]
 
 
 def _read(path: Path) -> list[dict[str, str]]:
diff --git a/scripts/extract_features.py b/scripts/extract_features.py
index e9a9c02..0e1503b 100644
--- a/scripts/extract_features.py
+++ b/scripts/extract_features.py
@@ -1,11 +1,12 @@
-"""Run signals A/B(/C) over a RAGTruth split and write a fusion features CSV.
+"""Run signals A/B/C over a RAGTruth split and write a fusion features CSV.
 
-For each summary sentence: classifier (A), NLI (B), and optionally attribution (C)
-scores + the grounded gold label. Output feeds scripts/train_fusion.py.
+For each summary sentence: classifier (A), NLI (B), and support attribution (C =
+attr_conc + attr_loo) scores + the grounded gold label. Output feeds the ablation.
 
-Attribution is off by default: RAGTruth summaries were not generated by our local
-model, so Inseq attribution is not well-defined for them (see research-plan.md §8).
-Enable with --with-attribution only when summaries come from our own summariser.
+Signal C here is the generator-agnostic support attribution (signals/support.py),
+derived from an NLI matrix, so it is well-defined for RAGTruth even though those
+summaries were not generated by our local model (unlike Inseq attribution; see
+research-plan.md §8). It therefore always runs.
 
 This runs the REAL models — launch on HPC (si-gpu / sbatch), not in CI.
 """
@@ -16,9 +17,9 @@
 
 from sumlens.eval.features import FIELDNAMES, feature_rows
 from sumlens.eval.ragtruth import load_split
-from sumlens.signals.attribution import attribute
 from sumlens.signals.classifier import classify
 from sumlens.signals.nli import entail, extract_claims
+from sumlens.signals.support import support_attribution
 from sumlens.types import AnalysisConfig
 
 
@@ -27,7 +28,6 @@ def main() -> None:
     parser.add_argument("--data-dir", type=Path, default=Path("data/ragtruth"))
     parser.add_argument("--split", default="train")
     parser.add_argument("--out", type=Path, default=Path("features.csv"))
-    parser.add_argument("--with-attribution", action="store_true")
     parser.add_argument("--limit", type=int, default=0, help="cap summaries (0 = all)")
     args = parser.parse_args()
 
@@ -40,9 +40,9 @@ def main() -> None:
     for document, summary, hallucinated in examples:
         classifier_out = classify(document, summary, cfg)
         nli_out = entail(extract_claims(summary), document, cfg)
-        attribution_out = attribute(document, summary, cfg) if args.with_attribution else {}
+        support_out = support_attribution(document, summary, cfg)
         rows.extend(
-            feature_rows(summary, hallucinated, classifier_out, nli_out, attribution_out)
+            feature_rows(summary, hallucinated, classifier_out, nli_out, support_out)
         )
 
     with args.out.open("w", encoding="utf-8", newline="") as fh:
diff --git a/scripts/jobs/run_eval.sbatch b/scripts/jobs/run_eval.sbatch
index efdbfc8..f68a47c 100755
--- a/scripts/jobs/run_eval.sbatch
+++ b/scripts/jobs/run_eval.sbatch
@@ -45,7 +45,9 @@ python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
 # after a crash/timeout resumes instead of redoing finished work) ---
 echo ">>> extract features (train)"; [ -f features_train.csv ] || python scripts/extract_features.py --split train --data-dir data/ragtruth --out features_train.csv
 echo ">>> extract features (test)";  [ -f features_test.csv ]  || python scripts/extract_features.py --split test  --data-dir data/ragtruth --out features_test.csv
-echo ">>> train fusion";             [ -f models/fusion.pkl ]  || python scripts/train_fusion.py --features features_train.csv --out-dir models
+# train_fusion (live model) is intentionally skipped: this experiment compares
+# signals via the ablation, which fits its own per-subset models. Promote a live
+# fusion model only after the ablation shows the new attribution signals help.
 echo ">>> ablation table";           [ -f ablation.csv ]       || python scripts/evaluate.py --train features_train.csv --test features_test.csv --out ablation.csv
 
 echo "=== DONE $(date) ==="
diff --git a/scripts/train_fusion.py b/scripts/train_fusion.py
index 90eb67d..e3d693b 100644
--- a/scripts/train_fusion.py
+++ b/scripts/train_fusion.py
@@ -14,13 +14,19 @@
 from sumlens.fuse import fit_fusion, fit_platt
 
 
+def _num(value: str, impute: float = 0.5) -> float:
+    # A signal column is empty when that signal was off for the run (e.g.
+    # attribution is off for RAGTruth). Impute neutral, matching the ablation.
+    return impute if value == "" else float(value)
+
+
 def _read(path: Path) -> tuple[list[list[float]], list[int]]:
     features: list[list[float]] = []
     grounded: list[int] = []
     with path.open(encoding="utf-8") as fh:
         for row in csv.DictReader(fh):
             features.append(
-                [float(row["classifier"]), float(row["nli"]), float(row["attribution"])]
+                [_num(row["classifier"]), _num(row["nli"]), _num(row["attribution"])]
             )
             grounded.append(int(row["grounded"]))
     return features, grounded
diff --git a/sumlens/eval/ablation.py b/sumlens/eval/ablation.py
index 25773a9..7daeb2e 100644
--- a/sumlens/eval/ablation.py
+++ b/sumlens/eval/ablation.py
@@ -1,22 +1,24 @@
 """Ablation over signal subsets — the report's centrepiece table.
 
-For each non-empty subset of {classifier (A), NLI (B), attribution (C)} we fit a
-fusion LogisticRegression on the train rows (using only that subset's columns),
-predict on the test rows, and report detection precision/recall/F1 (positive class
-= hallucinated) plus the calibration error of the grounding probability.
-
-Rows are mappings with keys: classifier, nli, attribution (float or None/""),
-and grounded (1 grounded / 0 hallucinated). Missing signal values are imputed.
+For each non-empty subset of {classifier (A), NLI (B), attr_conc (C), attr_loo (D)}
+we fit a fusion LogisticRegression on the train rows (using only that subset's
+columns), predict on the test rows, and report detection precision/recall/F1
+(positive class = hallucinated) plus the calibration error of the grounding
+probability. C and D are the two scalars of the generator-agnostic support
+attribution (signals/support.py).
+
+Rows are mappings with keys: classifier, nli, attr_conc, attr_loo (float or
+None/""), and grounded (1 grounded / 0 hallucinated). Missing values are imputed.
 """
 
 from collections.abc import Mapping, Sequence
 from itertools import combinations
 
-from sumlens.eval.metrics import expected_calibration_error
+from sumlens.eval.metrics import expected_calibration_error, pr_auc, roc_auc
 from sumlens.fuse import fit_fusion
 
-_SIGNALS = ("classifier", "nli", "attribution")
-_LETTER = {"classifier": "A", "nli": "B", "attribution": "C"}
+_SIGNALS = ("classifier", "nli", "attr_conc", "attr_loo")
+_LETTER = {"classifier": "A", "nli": "B", "attr_conc": "C", "attr_loo": "D"}
 
 Row = Mapping[str, object]
 
@@ -46,8 +48,15 @@ def _evaluate_combo(
     true_hallucinated = [1 - g for g in y_test]
     precision, recall, f1 = _prf(true_hallucinated, pred_hallucinated)
 
+    # Threshold-free detection metrics (positive class = hallucinated). f1 above
+    # is a single fixed-0.5 operating point and is misleading under the heavy
+    # hallucination class imbalance; roc_auc/pr_auc are the headline numbers.
+    proba_hallucinated = [1.0 - p for p in grounded_proba]
+
     return {
         "condition": "+".join(_LETTER[s] for s in combo),
+        "roc_auc": roc_auc(proba_hallucinated, true_hallucinated),
+        "pr_auc": pr_auc(proba_hallucinated, true_hallucinated),
         "precision": precision,
         "recall": recall,
         "f1": f1,
diff --git a/sumlens/eval/features.py b/sumlens/eval/features.py
index a9ca58d..3d56b75 100644
--- a/sumlens/eval/features.py
+++ b/sumlens/eval/features.py
@@ -1,16 +1,26 @@
 """Assemble fusion training rows from per-sentence signal outputs + gold labels.
 
-One row per summary sentence: the three signal scores (None if a signal did not
-run for that sentence) and the grounded label (1 if grounded, 0 if the RAGTruth
-gold marks the sentence hallucinated). This pure function is the testable core of
-`scripts/extract_features.py`, which supplies the real signal outputs.
+One row per summary sentence: the signal scores (None if a signal did not run for
+that sentence) and the grounded label (1 if grounded, 0 if the RAGTruth gold marks
+the sentence hallucinated). Signal C is the generator-agnostic support attribution
+(`signals/support.py`), which yields two scalars per sentence: attr_conc (support
+concentration) and attr_loo (best-supporter necessity margin). This pure function
+is the testable core of `scripts/extract_features.py`.
 """
 
 from collections.abc import Mapping
 
 from sumlens.types import Claim, Summary
 
-FIELDNAMES = ["summary_id", "sentence_id", "classifier", "nli", "attribution", "grounded"]
+FIELDNAMES = [
+    "summary_id",
+    "sentence_id",
+    "classifier",
+    "nli",
+    "attr_conc",
+    "attr_loo",
+    "grounded",
+]
 
 
 def feature_rows(
@@ -18,7 +28,7 @@ def feature_rows(
     hallucinated_ids: list[str],
     classifier_out: dict[str, tuple[float, list[tuple[int, int]]]],
     nli_out: dict[str, tuple[float, list[Claim]]],
-    attribution_out: dict[str, tuple[float, list[str]]],
+    support_out: dict[str, tuple[float, float, list[str]]],
 ) -> list[dict[str, object]]:
     hallucinated = set(hallucinated_ids)
     rows: list[dict[str, object]] = []
@@ -27,15 +37,18 @@ def feature_rows(
             {
                 "summary_id": summary.id,
                 "sentence_id": sentence.id,
-                "classifier": _score(classifier_out, sentence.id),
-                "nli": _score(nli_out, sentence.id),
-                "attribution": _score(attribution_out, sentence.id),
+                "classifier": _at(classifier_out, sentence.id, 0),
+                "nli": _at(nli_out, sentence.id, 0),
+                "attr_conc": _at(support_out, sentence.id, 0),
+                "attr_loo": _at(support_out, sentence.id, 1),
                 "grounded": 0 if sentence.id in hallucinated else 1,
             }
         )
     return rows
 
 
-def _score(signal_out: Mapping[str, tuple[float, object]], sentence_id: str) -> float | None:
+def _at(
+    signal_out: Mapping[str, tuple[object, ...]], sentence_id: str, index: int
+) -> float | None:
     entry = signal_out.get(sentence_id)
-    return entry[0] if entry is not None else None
+    return float(entry[index]) if entry is not None else None  # type: ignore[arg-type]
diff --git a/sumlens/eval/metrics.py b/sumlens/eval/metrics.py
index 6a35054..a121fcb 100644
--- a/sumlens/eval/metrics.py
+++ b/sumlens/eval/metrics.py
@@ -22,6 +22,51 @@ def sentence_f1(preds: dict[str, set[str]], golds: dict[str, set[str]]) -> dict[
     return {"precision": precision, "recall": recall, "f1": f1}
 
 
+def roc_auc(scores: list[float], labels: list[int]) -> float:
+    """Threshold-free ROC-AUC (rank-based, ties averaged). `scores` rank the
+    positive class (label 1). Returns 0.0 if either class is absent."""
+    n_pos = sum(labels)
+    n_neg = len(labels) - n_pos
+    if not n_pos or not n_neg:
+        return 0.0
+    order = sorted(zip(scores, labels, strict=True), key=lambda p: p[0])
+    ranks = [0.0] * len(order)
+    i = 0
+    while i < len(order):
+        j = i
+        while j < len(order) and order[j][0] == order[i][0]:
+            j += 1
+        rank = (i + j - 1) / 2 + 1  # 1-based average rank for the tie group
+        for k in range(i, j):
+            ranks[k] = rank
+        i = j
+    rank_sum_pos = sum(r for r, (_, label) in zip(ranks, order, strict=True) if label == 1)
+    return (rank_sum_pos - n_pos * (n_pos + 1) / 2) / (n_pos * n_neg)
+
+
+def pr_auc(scores: list[float], labels: list[int]) -> float:
+    """Average precision (area under precision-recall curve). `scores` rank the
+    positive class (label 1). Returns 0.0 if no positives. Better than ROC-AUC
+    under heavy class imbalance; floor is the positive base rate."""
+    n_pos = sum(labels)
+    if not n_pos:
+        return 0.0
+    order = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
+    tp = fp = 0
+    ap = 0.0
+    prev_recall = 0.0
+    for i in order:
+        if labels[i] == 1:
+            tp += 1
+        else:
+            fp += 1
+        recall = tp / n_pos
+        precision = tp / (tp + fp)
+        ap += (recall - prev_recall) * precision
+        prev_recall = recall
+    return ap
+
+
 def expected_calibration_error(
     scores: list[float], labels: list[int], n_bins: int = 10
 ) -> float:
diff --git a/sumlens/signals/support.py b/sumlens/signals/support.py
new file mode 100644
index 0000000..1401dc4
--- /dev/null
+++ b/sumlens/signals/support.py
@@ -0,0 +1,51 @@
+"""Signal C (redesign) — generator-agnostic source attribution from an NLI matrix.
+
+Inseq attribution (`attribution.py`) is gradient-based and needs the *generating*
+model, so it is undefined for RAGTruth (external-model summaries). This signal
+derives attribution from entailment alone, so it is defined for any (source,
+summary) pair. For each summary sentence ``s`` we score entailment against every
+source sentence ``j``, ``M[s][j] = P(src_j entails s)``, then collapse the row:
+
+- ``attr_conc(s) = max_j M - mean_j M`` — support concentration. A grounded
+  sentence has one sharp supporter; a fabricated one has diffuse, flat-low support.
+- ``attr_loo(s)  = top1 - top2`` — necessity margin of the single best supporter.
+- top-k source sentence ids — the UI heatmap (generator-free, no token offsets).
+
+Reuses signal B's NLI model and batched call. Pure given the NLI boundary, which
+tests mock via `_get_nli`. Consumed by `scripts/extract_features.py`.
+"""
+
+from sumlens.signals.nli import _entail_prob, _get_nli
+from sumlens.types import AnalysisConfig, Document, Summary
+
+_BATCH_SIZE = 64
+_TOP_K = 5
+
+
+def support_attribution(
+    document: Document, summary: Summary, cfg: AnalysisConfig
+) -> dict[str, tuple[float, float, list[str]]]:
+    """Per summary sentence: (attr_conc, attr_loo, top-k source sentence ids)."""
+    sources = document.sentences
+    sentences = summary.sentences
+    if not sentences or not sources:
+        return {s.id: (0.0, 0.0, []) for s in sentences}
+
+    nli = _get_nli(cfg.nli_model)
+    pairs = [
+        {"text": src.text, "text_pair": sent.text} for sent in sentences for src in sources
+    ]
+    batched = nli(pairs, top_k=None, batch_size=_BATCH_SIZE)
+    n = len(sources)
+
+    results: dict[str, tuple[float, float, list[str]]] = {}
+    for i, sentence in enumerate(sentences):
+        row = [_entail_prob(scores) for scores in batched[i * n : (i + 1) * n]]
+        order = sorted(range(n), key=lambda j: row[j], reverse=True)
+        top1 = row[order[0]]
+        top2 = row[order[1]] if n > 1 else 0.0
+        conc = top1 - sum(row) / n
+        loo = top1 - top2
+        top_ids = [sources[j].id for j in order[:_TOP_K]]
+        results[sentence.id] = (conc, loo, top_ids)
+    return results
diff --git a/tests/test_ablation.py b/tests/test_ablation.py
index 7e448f9..8fab62c 100644
--- a/tests/test_ablation.py
+++ b/tests/test_ablation.py
@@ -2,8 +2,8 @@
 
 from sumlens.eval.ablation import ablation_table
 
-_GROUNDED = {"classifier": 0.9, "nli": 0.8, "attribution": 0.7, "grounded": 1}
-_HALLUCINATED = {"classifier": 0.1, "nli": 0.2, "attribution": 0.3, "grounded": 0}
+_GROUNDED = {"classifier": 0.9, "nli": 0.8, "attr_conc": 0.7, "attr_loo": 0.6, "grounded": 1}
+_HALLUCINATED = {"classifier": 0.1, "nli": 0.2, "attr_conc": 0.3, "attr_loo": 0.2, "grounded": 0}
 _ROWS = [_GROUNDED, _HALLUCINATED] * 10
 
 
@@ -11,19 +11,26 @@ def test_ablation_table_conditions_and_scores() -> None:
     table = ablation_table(_ROWS, _ROWS)
 
     conditions = {row["condition"] for row in table}
-    assert conditions == {"A", "B", "C", "A+B", "A+C", "B+C", "A+B+C"}
+    assert conditions == {
+        "A", "B", "C", "D",
+        "A+B", "A+C", "A+D", "B+C", "B+D", "C+D",
+        "A+B+C", "A+B+D", "A+C+D", "B+C+D",
+        "A+B+C+D",
+    }
 
     for row in table:
-        for key in ("precision", "recall", "f1", "ece"):
+        for key in ("roc_auc", "pr_auc", "precision", "recall", "f1", "ece"):
             assert isinstance(row[key], float)
 
-    fused = next(row for row in table if row["condition"] == "A+B+C")
+    fused = next(row for row in table if row["condition"] == "A+B+C+D")
     assert fused["f1"] == 1.0  # perfectly separable -> perfect detection
+    assert fused["roc_auc"] == 1.0
+    assert fused["pr_auc"] == 1.0
 
 
 def test_ablation_imputes_missing_attribution() -> None:
-    # attribution missing ("") on every row -> still runs via imputation
-    rows = [{**r, "attribution": ""} for r in _ROWS]
+    # attr_conc missing ("") on every row -> still runs via imputation
+    rows = [{**r, "attr_conc": ""} for r in _ROWS]
     table = ablation_table(rows, rows)
     c_only = next(row for row in table if row["condition"] == "C")
     assert isinstance(c_only["f1"], float)
diff --git a/tests/test_features.py b/tests/test_features.py
index 41c5950..62c6d38 100644
--- a/tests/test_features.py
+++ b/tests/test_features.py
@@ -21,9 +21,10 @@ def test_feature_rows_labels_and_missing_signals() -> None:
     classifier_out = {"sum-0000": (0.1, []), "sum-0001": (0.9, [(0, 4)])}
     failed = Claim(id="c", sentence_id="sum-0001", text="x")
     nli_out = {"sum-0000": (0.8, []), "sum-0001": (0.2, [failed])}
-    attribution_out = {"sum-0001": (0.3, ["src-0000"])}  # only the gated sentence has C
+    # support attribution: (attr_conc, attr_loo, top_source_ids); sum-0000 absent
+    support_out = {"sum-0001": (0.3, 0.15, ["src-0000"])}
 
-    rows = feature_rows(_summary(), ["sum-0001"], classifier_out, nli_out, attribution_out)
+    rows = feature_rows(_summary(), ["sum-0001"], classifier_out, nli_out, support_out)
 
     assert rows == [
         {
@@ -31,7 +32,8 @@ def test_feature_rows_labels_and_missing_signals() -> None:
             "sentence_id": "sum-0000",
             "classifier": 0.1,
             "nli": 0.8,
-            "attribution": None,  # C did not run for this sentence
+            "attr_conc": None,  # C did not run for this sentence
+            "attr_loo": None,
             "grounded": 1,
         },
         {
@@ -39,7 +41,8 @@ def test_feature_rows_labels_and_missing_signals() -> None:
             "sentence_id": "sum-0001",
             "classifier": 0.9,
             "nli": 0.2,
-            "attribution": 0.3,
+            "attr_conc": 0.3,
+            "attr_loo": 0.15,
             "grounded": 0,  # marked hallucinated in gold
         },
     ]
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index c030132..9a9d129 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -6,7 +6,9 @@
 
 from sumlens.eval.metrics import (
     expected_calibration_error,
+    pr_auc,
     reliability_diagram,
+    roc_auc,
     sentence_f1,
 )
 
@@ -41,6 +43,38 @@ def test_ece_empty() -> None:
     assert expected_calibration_error([], []) == 0.0
 
 
+def test_roc_auc_perfect_separation() -> None:
+    assert roc_auc([0.1, 0.2, 0.8, 0.9], [0, 0, 1, 1]) == 1.0
+
+
+def test_roc_auc_inverted_is_zero() -> None:
+    assert roc_auc([0.9, 0.8, 0.2, 0.1], [0, 0, 1, 1]) == 0.0
+
+
+def test_roc_auc_ties_give_half() -> None:
+    # all scores equal -> every pair tied -> AUC 0.5
+    assert roc_auc([0.5, 0.5, 0.5, 0.5], [0, 1, 0, 1]) == 0.5
+
+
+def test_roc_auc_single_class_returns_zero() -> None:
+    assert roc_auc([0.1, 0.9], [1, 1]) == 0.0
+
+
+def test_pr_auc_perfect_separation() -> None:
+    assert pr_auc([0.1, 0.2, 0.8, 0.9], [0, 0, 1, 1]) == 1.0
+
+
+def test_pr_auc_floor_is_base_rate() -> None:
+    # scores carry no signal (descending but labels random) -> AP near base rate
+    assert pr_auc([0.4, 0.3, 0.2, 0.1], [1, 0, 0, 0]) == pytest.approx(1.0)
+    # worst ranking: the only positive is last -> precision 1/4 at recall 1
+    assert pr_auc([0.4, 0.3, 0.2, 0.1], [0, 0, 0, 1]) == pytest.approx(0.25)
+
+
+def test_pr_auc_no_positives_returns_zero() -> None:
+    assert pr_auc([0.1, 0.9], [0, 0]) == 0.0
+
+
 def test_reliability_diagram_writes_file(tmp_path: Path) -> None:
     out = tmp_path / "reliability.png"
     reliability_diagram([0.1, 0.4, 0.9, 0.95], [0, 0, 1, 1], out)
diff --git a/tests/test_support.py b/tests/test_support.py
new file mode 100644
index 0000000..d8ed9c0
--- /dev/null
+++ b/tests/test_support.py
@@ -0,0 +1,69 @@
+"""Support attribution (signal C) tests — NLI mocked at the `_get_nli` boundary."""
+
+import pytest
+
+from sumlens.signals import support as support_mod
+from sumlens.signals.support import support_attribution
+from sumlens.types import AnalysisConfig, Document, Sentence, Summary
+
+# Entailment lookup: (premise source sentence, hypothesis summary sentence) -> prob.
+_TABLE = {
+    ("Src A.", "Claim one."): 0.9,
+    ("Src B.", "Claim one."): 0.2,
+    ("Src C.", "Claim one."): 0.1,
+}
+
+
+class _FakeNLI:
+    def __call__(
+        self, pairs: list[dict[str, str]], top_k: object = None, batch_size: object = None
+    ) -> list[list[dict[str, object]]]:
+        return [
+            [
+                {"label": "entailment", "score": _TABLE[(p["text"], p["text_pair"])]},
+                {"label": "contradiction", "score": 0.0},
+            ]
+            for p in pairs
+        ]
+
+
+def _document() -> Document:
+    return Document(
+        id="doc-1",
+        raw_text="Src A. Src B. Src C.",
+        sentences=[
+            Sentence(id="src-0000", text="Src A.", char_start=0, char_end=6),
+            Sentence(id="src-0001", text="Src B.", char_start=7, char_end=13),
+            Sentence(id="src-0002", text="Src C.", char_start=14, char_end=20),
+        ],
+        source="text",
+    )
+
+
+def _summary() -> Summary:
+    return Summary(
+        id="doc-1-summary",
+        document_id="doc-1",
+        text="Claim one.",
+        sentences=[Sentence(id="sum-0000", text="Claim one.", char_start=0, char_end=10)],
+        model_name="m",
+    )
+
+
+def test_support_concentration_and_loo(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(support_mod, "_get_nli", lambda model_name: _FakeNLI())
+
+    result = support_attribution(_document(), _summary(), AnalysisConfig())
+
+    conc, loo, top_ids = result["sum-0000"]
+    # row = [0.9, 0.2, 0.1]: top1=0.9, top2=0.2, mean=0.4
+    assert conc == pytest.approx(0.9 - 0.4)  # peak minus mean
+    assert loo == pytest.approx(0.9 - 0.2)  # best-supporter margin
+    assert top_ids[0] == "src-0000"  # strongest supporting source first
+
+
+def test_support_empty_source(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(support_mod, "_get_nli", lambda model_name: _FakeNLI())
+    empty_doc = Document(id="d", raw_text="", sentences=[], source="text")
+    result = support_attribution(empty_doc, _summary(), AnalysisConfig())
+    assert result == {"sum-0000": (0.0, 0.0, [])}