Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,27 @@ When the bg task notification fires:
- ABANDONED → surface failure to user; remove watcher; skip cleanup
```

### The watcher exit code is necessary-not-sufficient (BRO-1489)

`GREEN` only means `gh pr checks --watch` exited 0 — which it does on a *subset*
of checks (required-only) and *before* async bot reviews (CodeRabbit) settle.
Observed three times on bstack PR #78: exit 0 while the PR was `UNSTABLE` / had a
pending review.

`p9 merge-ready` therefore **verifies the real merge predicate** before marking
`MERGE_READY`: it queries `gh pr view --json mergeable,mergeStateStatus,reviewDecision`
plus a best-effort `gh api graphql` unresolved-thread count, and is ready iff
`mergeStateStatus ∈ {CLEAN, UNSTABLE}` with no `CHANGES_REQUESTED` and zero
unresolved review threads. `BLOCKED`/`DIRTY`/`BEHIND`/`DRAFT`/`UNKNOWN`, an open
thread, or any gh error → refused (fail-safe). Pass `--no-verify` to skip
(test/offline only).

Query it directly without transitioning state:

```text
p9 merge-status <n> [--json] # exit 0 iff merge-ready; prints the verdict + reason
```

## Termination conditions

The agent exits the heal loop when **any** of:
Expand Down
148 changes: 148 additions & 0 deletions scripts/p9.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,6 +1430,121 @@ def cmd_events_tail(args: argparse.Namespace) -> int:
return EXIT_OK


def merge_ready_verdict(pr: int, repo: str | None) -> dict:
"""Query GitHub for the REAL merge predicate — not the `gh pr checks --watch`
exit code, which is necessary-not-sufficient (it returns 0 on a subset of
checks and before async bot reviews settle; observed three times on bstack
PR #78 — see BRO-1489).

Ready iff GitHub's server-computed ``mergeStateStatus`` says the required
gates are satisfied AND there are zero unresolved review threads:

- ``CLEAN`` → ready (everything green).
- ``UNSTABLE`` → ready ONLY when 0 unresolved review threads. UNSTABLE means
mergeable with all *required* checks green but some
*non-required* check pending/failing (e.g. a CodeRabbit
soft-status that never resolves). GitHub computes
required-vs-non-required server-side, so we trust
mergeStateStatus instead of fragile isRequired parsing.
- ``BLOCKED`` / ``DIRTY`` / ``BEHIND`` / ``DRAFT`` / ``UNKNOWN`` → not ready.
- any unresolved review thread → not ready (stricter than branch protection;
bstack reflex: every bot/human thread closed before merge).
- any error querying gh → not ready (fail-safe; never merge blind).

Returns a dict: {ready, state, mergeable, review_decision, unresolved_threads, reason}.
``unresolved_threads`` is -1 when it could not be determined (best-effort
graphql); an undeterminable count does NOT block (the GitHub-enforced gates
in mergeStateStatus already cover required conversation-resolution).
"""
# mergeable / mergeStateStatus / reviewDecision are the valid `gh pr view
# --json` fields. (reviewThreads is NOT a pr-view field — it needs graphql;
# see _unresolved_review_threads below. This was caught by dogfooding.)
cmd = ["gh", "pr", "view", str(pr), "--json",
"mergeable,mergeStateStatus,reviewDecision"]
if repo:
cmd += ["--repo", repo]
try:
out = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
except (FileNotFoundError, subprocess.TimeoutExpired) as e:
return {"ready": False, "state": "QUERY_FAILED", "mergeable": "UNKNOWN",
"review_decision": "", "unresolved_threads": -1,
"reason": f"gh unavailable: {e}"}
if out.returncode != 0:
return {"ready": False, "state": "QUERY_FAILED", "mergeable": "UNKNOWN",
"review_decision": "", "unresolved_threads": -1,
"reason": (out.stderr.strip()[:200] or "gh pr view failed")}
try:
data = json.loads(out.stdout)
except json.JSONDecodeError as e:
return {"ready": False, "state": "QUERY_FAILED", "mergeable": "UNKNOWN",
"review_decision": "", "unresolved_threads": -1,
"reason": f"non-JSON from gh: {e}"}

state = (data.get("mergeStateStatus") or "UNKNOWN").upper()
mergeable = (data.get("mergeable") or "UNKNOWN").upper()
decision = (data.get("reviewDecision") or "").upper()
unresolved = _unresolved_review_threads(pr, repo) # int, or -1 if undeterminable

if mergeable == "CONFLICTING" or state in {"DIRTY", "BEHIND", "DRAFT", "BLOCKED", "UNKNOWN"}:
ready, reason = False, f"mergeStateStatus={state} mergeable={mergeable}"
elif decision == "CHANGES_REQUESTED":
ready, reason = False, "reviewDecision=CHANGES_REQUESTED"
elif unresolved > 0:
ready, reason = False, f"{unresolved} unresolved review thread(s)"
elif state == "CLEAN":
ready, reason = True, "CLEAN"
elif state == "UNSTABLE":
ready, reason = True, ("UNSTABLE but required checks green + no blocking review "
"(only non-required checks un-green)")
else:
ready, reason = False, f"unrecognized mergeStateStatus={state}"

return {"ready": ready, "state": state, "mergeable": mergeable,
"review_decision": decision, "unresolved_threads": unresolved,
"reason": reason}


def _unresolved_review_threads(pr: int, repo: str | None) -> int:
"""Count unresolved review threads via `gh api graphql` (best-effort).

Returns the count, or -1 if it cannot be determined (no repo, gh/graphql
error). -1 does not block a merge — mergeStateStatus already reflects
GitHub-enforced conversation resolution when branch protection requires it;
this is the stricter bstack reflex-18 layer on top.
"""
if not repo or "/" not in repo:
return -1
owner, name = repo.split("/", 1)
q = ("query($o:String!,$n:String!,$p:Int!){repository(owner:$o,name:$n)"
"{pullRequest(number:$p){reviewThreads(first:100){nodes{isResolved}}}}}")
cmd = ["gh", "api", "graphql", "-f", f"query={q}",
"-F", f"o={owner}", "-F", f"n={name}", "-F", f"p={pr}"]
try:
out = subprocess.run(cmd, capture_output=True, text=True, timeout=30, check=False)
if out.returncode != 0:
return -1
nodes = (json.loads(out.stdout)["data"]["repository"]["pullRequest"]
["reviewThreads"]["nodes"])
return sum(1 for t in nodes if not t.get("isResolved", False))
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError, KeyError, TypeError):
return -1


def cmd_merge_status(args: argparse.Namespace) -> int:
"""Query + print the real merge predicate for a PR. The agent calls this
instead of trusting the watcher exit code. Exit 0 iff merge-ready."""
pr = int(args.pr)
repo = args.repo or _detect_repo()
v = merge_ready_verdict(pr, repo)
if args.json:
print(json.dumps(v))
else:
verdict = "READY" if v["ready"] else "NOT-READY"
print(f"PR #{pr}: {verdict} — {v['reason']} "
f"(mergeStateStatus={v['state']}, unresolved_threads={v['unresolved_threads']})")
return EXIT_OK if v["ready"] else EXIT_DEGRADED


def cmd_merge_ready(args: argparse.Namespace) -> int:
pr = int(args.pr)
state = current_pr_state(pr)
Expand All @@ -1440,6 +1555,27 @@ def cmd_merge_ready(args: argparse.Namespace) -> int:
)
return EXIT_DEGRADED
repo = args.repo or _detect_repo()

# Verify the REAL merge predicate before authorizing MERGE_READY. The GREEN
# state only means `gh pr checks --watch` exited 0 — necessary, not
# sufficient. Refuse if the PR is not actually mergeable (BRO-1489).
verdict = None
if not getattr(args, "no_verify", False):
verdict = merge_ready_verdict(pr, repo)
if not verdict["ready"]:
print(
f"PR #{pr} NOT merge-ready: {verdict['reason']} "
f"(mergeStateStatus={verdict['state']}, "
f"unresolved_threads={verdict['unresolved_threads']}).",
file=sys.stderr,
)
print(
" The watcher exit code is necessary-not-sufficient; refusing to "
"mark MERGE_READY. Re-run once checks settle, or --no-verify to override.",
file=sys.stderr,
)
return EXIT_DEGRADED

event = PRStateEvent(
ts=_utcnow(),
pr=pr,
Expand All @@ -1448,6 +1584,7 @@ def cmd_merge_ready(args: argparse.Namespace) -> int:
to_state=PRState.MERGE_READY.value,
watcher_id="merge-ready",
attempt=0,
extra={"merge_verdict": verdict} if verdict else {"merge_verdict": "skipped (--no-verify)"},
)
append_state_event(event)
print(f"PR #{pr} marked MERGE_READY (control metalayer authorizes merge)")
Expand Down Expand Up @@ -1562,8 +1699,19 @@ def build_parser() -> argparse.ArgumentParser:
pm = sub.add_parser("merge-ready", help="Mark PR as ready for metalayer-authorized merge")
pm.add_argument("pr")
pm.add_argument("--repo", default=None)
pm.add_argument("--no-verify", action="store_true",
help="Skip the live mergeStateStatus check (test/offline only; "
"by default merge-ready verifies the real merge predicate)")
pm.set_defaults(func=cmd_merge_ready)

pms = sub.add_parser("merge-status",
help="Query the REAL merge predicate (mergeStateStatus + "
"unresolved review threads) — not the watcher exit code")
pms.add_argument("pr")
pms.add_argument("--repo", default=None)
pms.add_argument("--json", action="store_true", help="Emit the verdict as JSON")
pms.set_defaults(func=cmd_merge_status)

pab = sub.add_parser("abandon",
help="Mark a PR as ABANDONED (frees concurrency slot)")
pab.add_argument("pr")
Expand Down
6 changes: 4 additions & 2 deletions tests/test_p9_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ def test_full_lifecycle_via_state_events(self, p9):
watcher_id="w100",
))

# 3. merge-ready (CLI command)
rc = p9.main(["merge-ready", "100", "--repo", "broomva/test"])
# 3. merge-ready (CLI command). --no-verify: this is a state-machine
# lifecycle test, not a gh-verdict test (the merge_ready_verdict gate is
# covered in test_p9_merge_verdict.py).
rc = p9.main(["merge-ready", "100", "--repo", "broomva/test", "--no-verify"])
assert rc == 0
assert p9.current_pr_state(100) == p9.PRState.MERGE_READY

Expand Down
180 changes: 180 additions & 0 deletions tests/test_p9_merge_verdict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""BRO-1489 — the merge predicate is mergeStateStatus + reviewDecision +
unresolved threads, NOT the `gh pr checks --watch` exit code.

Covers: merge_ready_verdict (the predicate, which makes a `gh pr view` call plus
a best-effort `gh api graphql` thread count), cmd_merge_status (the query), and
the cmd_merge_ready verify gate. Provenance: bstack PR #78, where the watcher
exited 0 three times while the PR was UNSTABLE / had a pending bot review.
"""
from __future__ import annotations

import importlib
import json
import sys
from pathlib import Path

import pytest

_HERE = Path(__file__).resolve().parent
_FIXTURES = _HERE / "fixtures"


@pytest.fixture()
def p9(tmp_path, monkeypatch):
monkeypatch.setenv("BROOMVA_P9_HOME", str(tmp_path))
monkeypatch.setenv("BROOMVA_P9_POLICY", str(_FIXTURES / "policy-good.yaml"))
if "p9" in sys.modules:
del sys.modules["p9"]
return importlib.import_module("p9")


class _Run:
def __init__(self, *, stdout="", stderr="", returncode=0):
self.stdout = stdout
self.stderr = stderr
self.returncode = returncode


def _mock_gh(p9, monkeypatch, *, view=None, threads=None,
view_rc=0, view_stderr="", view_exc=None,
view_nonjson=False, graphql_rc=0):
"""Dispatch subprocess.run by command: `gh pr view` vs `gh api graphql`."""
def fake(cmd, *a, **k):
if cmd[:3] == ["gh", "pr", "view"]:
if view_exc is not None:
raise view_exc
if view_nonjson:
return _Run(stdout="not json", returncode=0)
return _Run(stdout=json.dumps(view) if view is not None else "",
stderr=view_stderr, returncode=view_rc)
if cmd[:3] == ["gh", "api", "graphql"]:
if graphql_rc != 0:
return _Run(stderr="graphql err", returncode=graphql_rc)
payload = {"data": {"repository": {"pullRequest": {
"reviewThreads": {"nodes": threads or []}}}}}
return _Run(stdout=json.dumps(payload), returncode=0)
return _Run(returncode=1)
monkeypatch.setattr(p9.subprocess, "run", fake)


_CLEAN = {"mergeable": "MERGEABLE", "mergeStateStatus": "CLEAN", "reviewDecision": ""}
_UNSTABLE = {"mergeable": "MERGEABLE", "mergeStateStatus": "UNSTABLE", "reviewDecision": ""}


# ── the predicate ────────────────────────────────────────────────────────────

class TestVerdict:
def test_clean_no_threads_is_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view=_CLEAN, threads=[])
v = p9.merge_ready_verdict(1, "o/r")
assert v["ready"] is True and v["state"] == "CLEAN"

def test_unstable_no_threads_is_ready(self, p9, monkeypatch):
# PR #78 case: required green, only a non-required bot check un-green.
_mock_gh(p9, monkeypatch, view=_UNSTABLE, threads=[{"isResolved": True}])
v = p9.merge_ready_verdict(1, "o/r")
assert v["ready"] is True and v["state"] == "UNSTABLE"

def test_unstable_with_unresolved_thread_not_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view=_UNSTABLE,
threads=[{"isResolved": False}, {"isResolved": True}])
v = p9.merge_ready_verdict(1, "o/r")
assert v["ready"] is False and v["unresolved_threads"] == 1

def test_clean_with_unresolved_thread_not_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view=_CLEAN, threads=[{"isResolved": False}])
assert p9.merge_ready_verdict(1, "o/r")["ready"] is False

def test_changes_requested_not_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, threads=[], view={
"mergeable": "MERGEABLE", "mergeStateStatus": "CLEAN",
"reviewDecision": "CHANGES_REQUESTED"})
assert p9.merge_ready_verdict(1, "o/r")["ready"] is False

@pytest.mark.parametrize("state", ["BLOCKED", "DIRTY", "BEHIND", "DRAFT", "UNKNOWN"])
def test_hard_blocking_states_not_ready(self, p9, monkeypatch, state):
_mock_gh(p9, monkeypatch, threads=[], view={
"mergeable": "MERGEABLE", "mergeStateStatus": state, "reviewDecision": ""})
assert p9.merge_ready_verdict(1, "o/r")["ready"] is False

def test_conflicting_not_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, threads=[], view={
"mergeable": "CONFLICTING", "mergeStateStatus": "DIRTY", "reviewDecision": ""})
assert p9.merge_ready_verdict(1, "o/r")["ready"] is False

def test_clean_with_undeterminable_threads_still_ready(self, p9, monkeypatch):
# graphql fails → unresolved=-1 → does NOT block (mergeStateStatus governs).
_mock_gh(p9, monkeypatch, view=_CLEAN, graphql_rc=1)
v = p9.merge_ready_verdict(1, "o/r")
assert v["ready"] is True and v["unresolved_threads"] == -1

def test_view_failure_fails_safe(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view_rc=1, view_stderr="no such PR")
v = p9.merge_ready_verdict(1, "o/r")
assert v["ready"] is False and v["state"] == "QUERY_FAILED"

def test_gh_missing_fails_safe(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view_exc=FileNotFoundError("gh"))
assert p9.merge_ready_verdict(1, "o/r")["ready"] is False

def test_non_json_fails_safe(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view_nonjson=True)
assert p9.merge_ready_verdict(1, "o/r")["ready"] is False


# ── the query subcommand ─────────────────────────────────────────────────────

class TestMergeStatus:
def test_exit_zero_when_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, view=_CLEAN, threads=[])
assert p9.main(["merge-status", "5", "--repo", "o/r"]) == p9.EXIT_OK

def test_exit_degraded_when_not_ready(self, p9, monkeypatch):
_mock_gh(p9, monkeypatch, threads=[], view={
"mergeable": "MERGEABLE", "mergeStateStatus": "BLOCKED", "reviewDecision": ""})
assert p9.main(["merge-status", "5", "--repo", "o/r"]) == p9.EXIT_DEGRADED

def test_json_output(self, p9, monkeypatch, capsys):
_mock_gh(p9, monkeypatch, view=_CLEAN, threads=[])
p9.main(["merge-status", "5", "--repo", "o/r", "--json"])
assert json.loads(capsys.readouterr().out)["ready"] is True


# ── the merge-ready verify gate ──────────────────────────────────────────────

def _seed_green(p9, pr):
for prev, curr in [(p9.PRState.PUSHED, p9.PRState.WATCHING),
(p9.PRState.WATCHING, p9.PRState.GREEN)]:
p9.append_state_event(p9.PRStateEvent(
ts="2026-06-12T00:00:00+00:00", pr=pr, repo="o/r",
from_state=prev.value, to_state=curr.value, watcher_id="seed"))


class TestMergeReadyGate:
def test_green_and_ready_marks_merge_ready(self, p9, monkeypatch):
_seed_green(p9, 10)
_mock_gh(p9, monkeypatch, view=_CLEAN, threads=[])
assert p9.main(["merge-ready", "10", "--repo", "o/r"]) == p9.EXIT_OK
assert p9.current_pr_state(10) == p9.PRState.MERGE_READY

def test_green_but_not_ready_refuses(self, p9, monkeypatch):
"""The core bug: watcher said GREEN, but the PR is UNSTABLE with an open
thread. merge-ready must REFUSE and leave the PR in GREEN."""
_seed_green(p9, 11)
_mock_gh(p9, monkeypatch, view=_UNSTABLE, threads=[{"isResolved": False}])
assert p9.main(["merge-ready", "11", "--repo", "o/r"]) == p9.EXIT_DEGRADED
assert p9.current_pr_state(11) == p9.PRState.GREEN # NOT promoted

def test_no_verify_bypasses(self, p9, monkeypatch):
_seed_green(p9, 12)
_mock_gh(p9, monkeypatch, threads=[], view={
"mergeable": "MERGEABLE", "mergeStateStatus": "BLOCKED", "reviewDecision": ""})
assert p9.main(["merge-ready", "12", "--repo", "o/r", "--no-verify"]) == p9.EXIT_OK
assert p9.current_pr_state(12) == p9.PRState.MERGE_READY

def test_gate_blocks_when_gh_unavailable(self, p9, monkeypatch):
"""Fail-safe: if the verdict query errors, merge-ready refuses."""
_seed_green(p9, 13)
_mock_gh(p9, monkeypatch, view_rc=1, view_stderr="gh down")
assert p9.main(["merge-ready", "13", "--repo", "o/r"]) == p9.EXIT_DEGRADED
assert p9.current_pr_state(13) == p9.PRState.GREEN
Loading