diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f04d037..3c35d1b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -38,7 +38,7 @@ jobs: shellcheck install.sh ascii: - name: ascii-only (no em-dash / unicode) + name: ascii check (unicode) runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 @@ -89,21 +89,13 @@ jobs: python-version: "3.13" - name: Install pytest + cryptography + coverage run: pip install --upgrade pip && pip install pytest cryptography coverage - - name: Enable subprocess coverage - # The hooks and daemon run as subprocesses; this .pth makes each child - # python start coverage when COVERAGE_PROCESS_START is set, so their - # lines are measured instead of reading as 0%. - run: | - SP=$(python -c "import sysconfig; print(sysconfig.get_path('purelib'))") - printf 'import coverage; coverage.process_startup()\n' > "$SP/subcov.pth" - name: Run tests under coverage - env: - COVERAGE_PROCESS_START: ${{ github.workspace }}/pyproject.toml + # In-process measurement (deterministic). The hooks are unit-tested + # in-process; daemon.py is omitted (see pyproject [tool.coverage.run]) + # since it only runs as a subprocess. No COVERAGE_PROCESS_START / combine. run: PYTHONPATH=lib coverage run -m pytest -q - - name: Combine + report (gate 65%, Silver target 80%) - run: | - coverage combine - coverage report --fail-under=65 + - name: Report (gate 67%, Silver target 80% tracked in #39) + run: coverage report --fail-under=67 manifest-integrity: name: regenerate MANIFEST.lock diff --git a/docs/roadmap.md b/docs/roadmap.md index 3c8078d..3ec6161 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -58,13 +58,13 @@ The bar to add an item here is: someone asked, the maintainer thought about it, **Tracking**: #39 -**Status**: CI gates coverage at 65% as of v0.6.2; 80% waits on hook tests. +**Status**: deterministic gate at 67% (69% measured); climbing to 80 incrementally. -**Today**: the `coverage` job measures ~67%. It captures the hooks and daemon (which run as subprocesses) via `COVERAGE_PROCESS_START` + `coverage combine`; a plain in-process `--cov` reads 59% because it misses them. Most of the gap to 80% sits in the action hooks (`lib/hook_stop.py`, `lib/hook_post_tool_bash.py`, `lib/hook_pre_tool_bash.py`, `lib/hook_post_tool_edit.py`): `tests/test_hooks_smoke.py` checks only that they run on trivial input, not their branches. +**Today**: the `coverage` job measures statement coverage in-process, so the number is stable (the earlier subprocess capture swung 60-67% run to run). `daemon.py` is omitted - it only runs as a spawned subprocess, so in-process coverage can't see it; it is behavior-tested by `tests/test_daemon.py`. The action hooks now have real branch tests (`hook_pre_tool_bash` 93%, `hook_post_tool_edit` 89%, `hook_stop` 79%, `hook_post_tool_bash` 73%). The remaining gap to 80% is spread across `_common`, `doctor`, `telemetry`, `redact`, `integrity`, and `hook_session_start`. -**Proposal**: test each hook with real payloads (transcript parsing, revert detection, redaction, the confidence gate), then step the CI `--fail-under` up to 80. +**Proposal**: add branch tests for those next-largest gaps and step the CI `--fail-under` up toward 80. -**Why deferred**: 80% is the Silver `test_statement_coverage80` criterion, and Silver already blocks on the single-maintainer rules (2+ contributors, bus factor >= 2, two-person review). Worth doing regardless, but no rush to hit the exact number while those block the badge. +**Why incremental**: 80% is the Silver `test_statement_coverage80` criterion, and Silver already blocks on the single-maintainer rules (2+ contributors, bus factor >= 2, two-person review). Worth doing regardless, but no rush to the exact number while those block the badge. ## Agent Client Protocol (ACP) for Zed and other ACP-aware tools diff --git a/pyproject.toml b/pyproject.toml index 6a70c7d..acb9fd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,14 +78,16 @@ addopts = ["-ra", "--strict-markers", "--strict-config"] filterwarnings = ["error"] [tool.coverage.run] -# Statement coverage of the runtime source only. presence_ext is the compiled -# Rust extension (gitignored build artifact), not Python under test. -# parallel + a subprocess .pth (see the coverage CI job) capture the hooks and -# daemon, which the tests exercise as subprocesses rather than in-process. +# Statement coverage of the runtime source, measured in-process so the number is +# deterministic. Omitted: +# presence_ext/* - compiled Rust extension (gitignored build artifact). +# daemon.py - runs only as a spawned subprocess, so in-process coverage +# cannot see it; it is behavior-tested by tests/test_daemon.py +# (socket perms, pid file, cache clearing, idle timeout, +# malformed JSON). Counting it would require flaky subprocess +# capture, which this project deliberately does not use. source = ["lib"] -omit = ["lib/presence_ext/*"] -parallel = true -sigterm = true +omit = ["lib/presence_ext/*", "lib/daemon.py"] [tool.coverage.report] show_missing = true diff --git a/tests/conftest.py b/tests/conftest.py index 29e8547..91eacd2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -35,3 +35,34 @@ def fake_repo(tmp_path, monkeypatch): subprocess.run(["git", "commit", "--allow-empty", "-m", "init", "-q"], cwd=repo, check=True) monkeypatch.chdir(repo) return repo + + +@pytest.fixture +def hook_runner(isolated_state, fake_repo, monkeypatch): + """Drive a hook's main() in-process. Returns (run, state, repo). + + run(hook_name, payload, settings=None) plants settings.json (use dotted + override keys, e.g. {"overrides": {"confidence.stop_action": "block"}}), + reloads _common + the hook so they bind this test's PRESENCE_STATE and a + fresh settings cache, feeds payload as stdin, and calls main(). Read stdout + with the capsys fixture; inspect what the hook wrote under `state`. + + In-process (not subprocess) so coverage is measured deterministically. + """ + import importlib + import io + import json + import sys + state = isolated_state + + def run(hook_name: str, payload: dict, settings: dict | None = None) -> None: + if settings is not None: + (state / "settings.json").write_text(json.dumps(settings), encoding="utf-8") + import _common + importlib.reload(_common) + mod = importlib.import_module(hook_name) + importlib.reload(mod) + monkeypatch.setattr(sys, "stdin", io.StringIO(json.dumps(payload))) + mod.main() + + return run, state, fake_repo diff --git a/tests/test_daemon.py b/tests/test_daemon.py index e90c90b..f130e07 100644 --- a/tests/test_daemon.py +++ b/tests/test_daemon.py @@ -62,6 +62,19 @@ def _spawn_daemon(state_dir: Path) -> subprocess.Popen: ) +def _stop_daemon(proc: subprocess.Popen) -> None: + """Terminate a spawned daemon and reap it. Always wait after kill: coverage + instrumentation slows daemon shutdown past a short terminate timeout, and an + unreaped Popen trips a ResourceWarning in Popen.__del__ that filterwarnings + error escalates into a failure.""" + proc.terminate() + try: + proc.wait(timeout=10) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + + def _wait_for_socket(sock_path: Path, timeout: float = 5.0) -> bool: deadline = time.time() + timeout while time.time() < deadline: @@ -106,11 +119,7 @@ def test_daemon_responds_to_user_prompt_submit(short_state_dir): # Daemon should still be alive after one request. assert proc.poll() is None finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_socket_perms_are_0600(short_state_dir): @@ -127,11 +136,7 @@ def test_daemon_socket_perms_are_0600(short_state_dir): mode = stat.S_IMODE(sock.stat().st_mode) assert mode == 0o600, f"socket perms should be 0o600, got 0o{mode:03o}" finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_writes_pid_file(short_state_dir): @@ -148,11 +153,7 @@ def test_daemon_writes_pid_file(short_state_dir): recorded_pid = int(pid_file.read_text().strip()) assert recorded_pid == proc.pid finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_rejects_unknown_hook(short_state_dir): @@ -167,11 +168,7 @@ def test_daemon_rejects_unknown_hook(short_state_dir): # Daemon stays alive after a bad request. assert proc.poll() is None finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_handles_multiple_sequential_requests(short_state_dir): @@ -198,11 +195,7 @@ def test_daemon_handles_multiple_sequential_requests(short_state_dir): assert b"hookSpecificOutput" in response or response == b"" assert proc.poll() is None, f"daemon died after request {i}" finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_clears_caches_between_requests(short_state_dir, monkeypatch): @@ -241,11 +234,7 @@ def test_daemon_clears_caches_between_requests(short_state_dir, monkeypatch): if response: assert b"hookSpecificOutput" in response or response == b"" finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_idle_timeout_auto_exits(short_state_dir, monkeypatch): @@ -298,11 +287,7 @@ def test_daemon_idle_timeout_auto_exits(short_state_dir, monkeypatch): assert not sock.exists(), "daemon left a stale socket on auto-exit" finally: if proc.poll() is None: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) def test_daemon_handles_malformed_json(short_state_dir): @@ -328,8 +313,4 @@ def test_daemon_handles_malformed_json(short_state_dir): assert b"Malformed" in response assert proc.poll() is None finally: - proc.terminate() - try: - proc.wait(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() + _stop_daemon(proc) diff --git a/tests/test_hook_post_tool_edit.py b/tests/test_hook_post_tool_edit.py new file mode 100644 index 0000000..55fb6fc --- /dev/null +++ b/tests/test_hook_post_tool_edit.py @@ -0,0 +1,52 @@ +"""Branch tests for hook_post_tool_edit: the PostToolUse(Edit|Write|MultiEdit) hook.""" +from __future__ import annotations + +import json + + +def _edit_events(state, repo): + """Return the edit events the hook recorded for `repo`.""" + out = [] + for p in (state / "events").rglob("pending.jsonl"): + for line in p.read_text(encoding="utf-8").splitlines(): + if line.strip(): + ev = json.loads(line) + if ev.get("kind") == "edit": + out.append(ev) + return out + + +def test_edit_recorded_with_tool_and_path(hook_runner): + run, state, repo = hook_runner + run("hook_post_tool_edit", { + "cwd": str(repo), + "tool_name": "Edit", + "tool_input": {"file_path": "/x/y.py"}, + }) + events = _edit_events(state, repo) + assert len(events) == 1 + assert events[0]["tool"] == "Edit" + assert events[0]["path"] == "/x/y.py" + + +def test_edit_path_fallback_to_path_key(hook_runner): + """tool_input may carry `path` instead of `file_path`.""" + run, state, repo = hook_runner + run("hook_post_tool_edit", { + "cwd": str(repo), + "tool_name": "Write", + "tool_input": {"path": "/a/b.txt"}, + }) + events = _edit_events(state, repo) + assert len(events) == 1 + assert events[0]["path"] == "/a/b.txt" + + +def test_edit_skipped_when_events_disabled(hook_runner): + run, state, repo = hook_runner + run( + "hook_post_tool_edit", + {"cwd": str(repo), "tool_name": "Edit", "tool_input": {"file_path": "/x/y.py"}}, + settings={"preset": "solo-dev", "overrides": {"events.enabled": False}}, + ) + assert _edit_events(state, repo) == [] diff --git a/tests/test_hook_pre_tool_bash.py b/tests/test_hook_pre_tool_bash.py new file mode 100644 index 0000000..3e58997 --- /dev/null +++ b/tests/test_hook_pre_tool_bash.py @@ -0,0 +1,72 @@ +"""Branch tests for hook_pre_tool_bash: the commit/push confidence gate.""" +from __future__ import annotations + + +def _seed(repo, *, edit_age=None, pass_age=None): + """Seed edit / test_pass events at controlled ages (seconds ago), within the + scan_recent window so the gate sees them.""" + import events + from _common import now_ts + t = now_ts() + if edit_age is not None: + events.append_event({"kind": "edit", "ts": t - edit_age}, cwd=str(repo)) + if pass_age is not None: + events.append_event({"kind": "test_pass", "ts": t - pass_age}, cwd=str(repo)) + + +def _settings(gate): + return {"preset": "solo-dev", "overrides": {"confidence.commit_gate": gate}} + + +def test_gate_off_is_silent(hook_runner, capsys): + run, state, repo = hook_runner + _seed(repo, edit_age=10) # unverified edit present, but gate is off + run("hook_pre_tool_bash", {"cwd": str(repo), "tool_input": {"command": "git commit -m x"}}, + settings=_settings("off")) + assert capsys.readouterr().out.strip() == "" + + +def test_non_commit_command_is_ignored(hook_runner, capsys): + run, state, repo = hook_runner + _seed(repo, edit_age=10) + run("hook_pre_tool_bash", {"cwd": str(repo), "tool_input": {"command": "ls -la"}}, + settings=_settings("block")) + assert capsys.readouterr().out.strip() == "" + + +def test_warn_emits_context_without_blocking(hook_runner, capsys): + run, state, repo = hook_runner + _seed(repo, edit_age=10) # edit, no later pass -> unverified + run("hook_pre_tool_bash", {"cwd": str(repo), "tool_input": {"command": "git commit -m x"}}, + settings=_settings("warn")) + out = capsys.readouterr().out + assert "without verification" in out + assert "additionalContext" in out + assert "permissionDecision" not in out # warn must never block + + +def test_ask_sets_permission_ask(hook_runner, capsys): + run, state, repo = hook_runner + _seed(repo, edit_age=10) + run("hook_pre_tool_bash", {"cwd": str(repo), "tool_input": {"command": "git push"}}, + settings=_settings("ask")) + out = capsys.readouterr().out + assert '"permissionDecision":"ask"' in out + + +def test_block_denies(hook_runner, capsys): + run, state, repo = hook_runner + _seed(repo, edit_age=10) + run("hook_pre_tool_bash", {"cwd": str(repo), "tool_input": {"command": "git commit -m x"}}, + settings=_settings("block")) + out = capsys.readouterr().out + assert '"permissionDecision":"deny"' in out + + +def test_evidence_after_edit_passes_gate(hook_runner, capsys): + """A passing test logged AFTER the most recent edit -> no intervention.""" + run, state, repo = hook_runner + _seed(repo, edit_age=10, pass_age=5) # pass is newer than edit + run("hook_pre_tool_bash", {"cwd": str(repo), "tool_input": {"command": "git commit -m x"}}, + settings=_settings("block")) + assert capsys.readouterr().out.strip() == "" diff --git a/tests/test_hook_stop.py b/tests/test_hook_stop.py new file mode 100644 index 0000000..eb59e03 --- /dev/null +++ b/tests/test_hook_stop.py @@ -0,0 +1,135 @@ +"""Branch tests for hook_stop: the calibrated-confidence gate.""" +from __future__ import annotations + +import json + + +def _write_transcript(path, *messages): + """Write a JSONL transcript of assistant messages (string content).""" + lines = [json.dumps({"type": "assistant", "message": {"content": m}}) for m in messages] + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +def _seed(repo, *, edit_age=None, pass_age=None): + import events + from _common import now_ts + t = now_ts() + if edit_age is not None: + events.append_event({"kind": "edit", "ts": t - edit_age}, cwd=str(repo)) + if pass_age is not None: + events.append_event({"kind": "test_pass", "ts": t - pass_age}, cwd=str(repo)) + + +def _warnings(state): + f = state / "logs" / "warnings.log" + return f.read_text(encoding="utf-8") if f.exists() else "" + + +def _confidence(state): + f = state / "telemetry" / "confidence.jsonl" + if not f.exists(): + return [] + return [json.loads(x) for x in f.read_text(encoding="utf-8").splitlines() if x.strip()] + + +# --- early returns ----------------------------------------------------------- + +def test_disabled_confidence_returns(hook_runner, capsys, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "t.jsonl" + _write_transcript(t, "I fixed it, it works.") + _seed(repo, edit_age=10) + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}, + settings={"preset": "solo-dev", "overrides": {"confidence.enabled": False}}) + assert capsys.readouterr().out.strip() == "" + assert _confidence(state) == [] + + +def test_missing_transcript_returns(hook_runner, capsys): + run, state, repo = hook_runner + run("hook_stop", {"cwd": str(repo)}) # no transcript_path + assert capsys.readouterr().out.strip() == "" + + +def test_no_assistant_text_warns(hook_runner, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "empty.jsonl" + t.write_text('{"type":"user","message":{"content":"hi"}}\n', encoding="utf-8") + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}) + assert "transcript_no_assistant_text" in _warnings(state) + + +def test_hedged_claim_does_nothing(hook_runner, capsys, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "t.jsonl" + _write_transcript(t, "I fixed it but it is untested and needs verification.") + _seed(repo, edit_age=10) + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}) + assert _confidence(state) == [] # hedged -> not a claim + + +def test_claim_without_recent_edit_returns(hook_runner, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "t.jsonl" + _write_transcript(t, "All done, it works.") + # no edit seeded -> has_recent_edit False + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}) + assert _confidence(state) == [] + + +# --- the meaningful outcomes ------------------------------------------------- + +def test_verified_claim_records_verified(hook_runner, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "t.jsonl" + _write_transcript(t, "Fixed and all tests pass.") + _seed(repo, edit_age=10, pass_age=5) # edit + a passing test in window + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}) + rows = _confidence(state) + assert len(rows) == 1 and rows[0]["verified"] is True + assert "unverified_success_claim" not in _warnings(state) + + +def test_unverified_claim_default_warns(hook_runner, capsys, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "t.jsonl" + _write_transcript(t, "Done, it works now.") + _seed(repo, edit_age=10) # edit, no pass -> unverified + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}) + rows = _confidence(state) + assert len(rows) == 1 and rows[0]["verified"] is False + assert "unverified_success_claim" in _warnings(state) + assert capsys.readouterr().out.strip() == "" # default stop_action is silent + + +def test_unverified_claim_block_emits_decision(hook_runner, capsys, tmp_path): + run, state, repo = hook_runner + t = tmp_path / "t.jsonl" + _write_transcript(t, "Done, it works now.") + _seed(repo, edit_age=10) + run("hook_stop", {"cwd": str(repo), "transcript_path": str(t)}, + settings={"preset": "solo-dev", "overrides": {"confidence.stop_action": "block"}}) + out = capsys.readouterr().out + assert '"decision":"block"' in out + + +# --- pure helpers ------------------------------------------------------------ + +def test_transcript_max_bytes_override_and_invalid(): + import hook_stop + assert hook_stop._transcript_max_bytes({"transcript": {"max_bytes": 4096}}) == 4096 + assert hook_stop._transcript_max_bytes({"transcript": {"max_bytes": 10}}) == 1024 # floor + assert hook_stop._transcript_max_bytes({"transcript": {"max_bytes": "nope"}}) == \ + hook_stop.TRANSCRIPT_TAIL_BYTES_DEFAULT + + +def test_last_assistant_text_string_and_list(tmp_path): + import hook_stop + p = tmp_path / "mix.jsonl" + p.write_text( + json.dumps({"type": "assistant", "message": {"content": "first"}}) + "\n" + + json.dumps({"type": "assistant", "message": {"content": [ + {"type": "text", "text": "block A"}, {"type": "text", "text": "block B"}]}}) + "\n", + encoding="utf-8", + ) + assert hook_stop._last_assistant_text(p) == "block A\nblock B" diff --git a/tests/test_post_tool_bash.py b/tests/test_post_tool_bash.py index bdb2862..87c570b 100644 --- a/tests/test_post_tool_bash.py +++ b/tests/test_post_tool_bash.py @@ -117,3 +117,74 @@ def test_redact_profiles_threaded_to_bash_event(hook_env): joined = " ".join(bash_lines) assert "GB82WEST12345698765432" not in joined assert "[REDACTED:iban]" in joined + + +# --- in-process branch coverage (hook_runner) -------------------------------- + +def _claims(state): + f = state / "telemetry" / "claims.jsonl" + if not f.exists(): + return [] + return [json.loads(x) for x in f.read_text(encoding="utf-8").splitlines() if x.strip()] + + +def _events_of_kind(state, kind): + out = [] + for p in (state / "events").rglob("pending.jsonl"): + for line in p.read_text(encoding="utf-8").splitlines(): + if line.strip() and json.loads(line).get("kind") == kind: + out.append(json.loads(line)) + return out + + +def test_git_push_records_push_claim(hook_runner): + run, state, repo = hook_runner + run("hook_post_tool_bash", { + "cwd": str(repo), + "tool_input": {"command": "git push origin main"}, + "tool_response": {"exit_code": 0}, + }) + claims = _claims(state) + assert len(claims) == 1 and claims[0]["kind"] == "push" + + +def test_gh_pr_create_records_push_claim(hook_runner): + run, state, repo = hook_runner + run("hook_post_tool_bash", { + "cwd": str(repo), + "tool_input": {"command": "gh pr create --fill"}, + "tool_response": {"exit_code": 0}, + }) + claims = _claims(state) + assert len(claims) == 1 and claims[0]["kind"] == "push" + + +def test_passing_test_command_classified(hook_runner): + run, state, repo = hook_runner + run("hook_post_tool_bash", { + "cwd": str(repo), + "tool_input": {"command": "pytest -q"}, + "tool_response": {"exit_code": 0}, + }) + assert _events_of_kind(state, "test_pass") + assert _claims(state) == [] # a test run is not a commit/push claim + + +def test_nonzero_exit_records_nothing(hook_runner): + run, state, repo = hook_runner + run("hook_post_tool_bash", { + "cwd": str(repo), + "tool_input": {"command": "git commit -m x"}, + "tool_response": {"exit_code": 1}, + }) + assert _claims(state) == [] + + +def test_resolve_commit_cwd(): + import os + + from hook_post_tool_bash import _resolve_commit_cwd + assert _resolve_commit_cwd("/repo", "git -C /abs/dir commit -m x") == "/abs/dir" + assert _resolve_commit_cwd("/repo", "git -C sub commit -m x") == os.path.join("/repo", "sub") + assert _resolve_commit_cwd("/repo", "cd sub && git commit -m x") == os.path.join("/repo", "sub") + assert _resolve_commit_cwd("/repo", "git commit -m x") == "/repo"