diff --git a/.env.example b/.env.example index f8793d7..36a8183 100644 --- a/.env.example +++ b/.env.example @@ -30,3 +30,35 @@ PROCESS_CONCURRENCY=4 # Max attempts before a job moves from `failed` to permanently failed. MAX_ATTEMPTS=3 + +## OIDC Authentication (verifier reviewer identity) + +# When unset, the verifier falls back to either VERIFIER_PASSWORD-gated +# BasicAuth or no auth at all (current local-dev default). Set all five +# vars together to enable OIDC against api.wxyc.org/auth. + +# Issuer URL of the WXYC Better Auth server. Production: +# https://api.wxyc.org/auth. Local: http://localhost:8082/auth. +WXYC_AUTH_ISSUER= + +# OIDC client id (must match Backend-Service's FLOWSHEET_OIDC_CLIENT_ID). +WXYC_OIDC_CLIENT_ID= + +# OIDC client secret (must match Backend-Service's +# FLOWSHEET_OIDC_CLIENT_SECRET exactly). Treat as production secret; +# never commit a real value. +WXYC_OIDC_CLIENT_SECRET= + +# Signing key for the verifier's own session cookie. 32+ random bytes; +# rotating it invalidates every active reviewer session. Generate with +# `python -c "import secrets; print(secrets.token_urlsafe(48))"`. The +# cookie's TTL is hard-coded to 12 hours in core/auth.py; expect a +# re-login at that boundary mid-session (no refresh-token plumbing +# yet — by design, see plans/oidc-auth.md "Deliberately out of scope"). +WXYC_SESSION_SECRET= + +# Public origin where this verifier is reachable. Determines the +# `redirect_uri` sent to the auth server and gates the `secure` flag +# on session cookies (Secure only when the value starts with `https`). +# Production: https://flowsheet.wxyc.org. Local: http://localhost:8765. +FLOWSHEET_PUBLIC_URL= diff --git a/CLAUDE.md b/CLAUDE.md index c732359..2563860 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,8 +26,23 @@ core/ schema.py Pydantic models. GeminiPageResult is what the model returns (used as response_schema); PageResult adds caller-set model_version + - extracted_at and is what lands on disk. Entry - and Quadrant are shared between both. + extracted_at + verified_by and is what lands + on disk. Entry and Quadrant are shared + between both. + auth.py OIDC client (against Better Auth at + api.wxyc.org/auth), signed session-cookie + codec (itsdangerous), and the ReviewerSession + dataclass. Consumed by verifier/serve.py + (middleware + /api/save). Module-level cached + discovery doc + JWKS — configuration, not + per-request state; _reset_metadata_cache() + is the test seam. Lazy-loaded (not eager at + boot) so a transient auth-server slowdown + doesn't take the verifier offline; first + caller after a deploy pays ~50-150ms cold + start. Deviation from the DI pattern in + core/gemini.py is intentional; don't "fix" + it back — see the module docstring. prompts.py Extraction prompt (separate module so prompt changes are reviewable independently). gemini.py Async wrapper around google-genai. Dependency diff --git a/core/jobs.py b/core/jobs.py index 15a8208..2786aea 100644 --- a/core/jobs.py +++ b/core/jobs.py @@ -53,6 +53,7 @@ class Job: verified_at: str | None verified_path: str | None corrections_path: str | None + reviewer_id: str | None created_at: str updated_at: str @@ -73,6 +74,7 @@ def from_row(cls, row: aiosqlite.Row) -> Self: verified_at=row["verified_at"] if "verified_at" in keys else None, verified_path=row["verified_path"] if "verified_path" in keys else None, corrections_path=(row["corrections_path"] if "corrections_path" in keys else None), + reviewer_id=row["reviewer_id"] if "reviewer_id" in keys else None, created_at=row["created_at"], updated_at=row["updated_at"], ) @@ -91,6 +93,7 @@ def from_row(cls, row: aiosqlite.Row) -> Self: verified_at TEXT, verified_path TEXT, corrections_path TEXT, + reviewer_id TEXT, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, PRIMARY KEY (pdf_path, page_number) @@ -106,6 +109,11 @@ def from_row(cls, row: aiosqlite.Row) -> Self: ("verified_at", "TEXT"), ("verified_path", "TEXT"), ("corrections_path", "TEXT"), + # Denormalized OIDC user_id of the volunteer who last verified the + # page. Duplicates the user_id in `verified_by.user_id` inside the + # verified.json file, but lets per-reviewer queries run without + # parsing every JSON on disk. + ("reviewer_id", "TEXT"), ) # Indexes that depend on late-added columns and therefore must be created @@ -283,6 +291,7 @@ async def mark_verified( *, verified_path: Path, corrections_path: Path, + reviewer_id: str | None = None, ) -> bool: """Record that a page has been hand-verified via the verifier UI. @@ -292,11 +301,17 @@ async def mark_verified( clear the verification record by default — that's a separate decision a human makes via `retry`). + `reviewer_id` is the OIDC user_id of the volunteer who saved the + page (`ReviewerSession.user_id`). Optional so the BasicAuth and + no-auth deployments — and the existing test fixtures — keep + compiling without a reviewer to credit. + Returns True if a job row matched, False otherwise. Callers (e.g. the verifier server) may want to write files even when no job row exists for the page (test fixtures), so a False return is not an error. """ + now = _now() async with self._connect() as db: cursor = await db.execute( """ @@ -304,14 +319,16 @@ async def mark_verified( SET verified_at = ?, verified_path = ?, corrections_path = ?, + reviewer_id = ?, updated_at = ? WHERE pdf_path = ? AND page_number = ? """, ( - _now(), + now, str(verified_path), str(corrections_path), - _now(), + reviewer_id, + now, pdf_path, page_number, ), diff --git a/core/schema.py b/core/schema.py index 7ae3f6d..60ec915 100644 --- a/core/schema.py +++ b/core/schema.py @@ -170,13 +170,61 @@ def _check_quadrant_order(self) -> Self: return self +class VerifiedBy(BaseModel): + """Identity of the reviewer who saved this verified page. + + Populated only by `verifier/serve.py`'s POST /api/save handler when + an authenticated reviewer session is present. Pipeline-written + PageResults leave `PageResult.verified_by` as None. + + `user_id` is deliberately denormalized to `jobs.reviewer_id` (see + `core/jobs.py`) so per-reviewer queries don't have to parse every + JSON file on disk. The two values are always equal for the same + save call; both are written inside `/api/save` under server + authority — the client never sets either. A client-supplied + `verified_by` block on a save POST is overwritten with the + authenticated reviewer's values before persistence (see the + handler). + """ + + user_id: str = Field(description="Better Auth user.id (the OIDC `sub` claim).") + username: str | None = Field( + default=None, + description="Better Auth username, if set.", + ) + real_name: str | None = Field( + default=None, + description="Reviewer's real name from the WXYC user record.", + ) + dj_name: str | None = Field( + default=None, + description="Reviewer's on-air DJ name, if set.", + ) + verified_at: datetime = Field( + description="When the verifier UI saved this page (UTC).", + ) + + class PageResult(GeminiPageResult): - """On-disk shape: `GeminiPageResult` plus the two fields the caller owns. + """On-disk shape: `GeminiPageResult` plus the fields the caller owns. `model_version` and `extracted_at` are filled by the pipeline (or by each calibration adapter) at write-time. They are NOT part of the Gemini response_schema — see the module docstring. + + `verified_by` is populated only by `verifier/serve.py` when a + reviewer saves a page through the verifier UI. Defaulting to None + keeps every pre-OIDC `verified.json` parseable (the field is + silently absent on old files); new saves write the block and old + files are upgraded on the next save. """ model_version: str = Field(description="Model id that produced this result.") extracted_at: datetime = Field(description="When the extraction completed (UTC).") + verified_by: VerifiedBy | None = Field( + default=None, + description=( + "Reviewer who saved this corrected page via the verifier UI. " + "None for results that have only been through the automatic pipeline." + ), + ) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 0000000..e0cef23 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,42 @@ +"""Shared fixtures + helpers for `tests/unit/`. + +Created in the OIDC PR for `_reset_serve_module_state`, which the +parametrized middleware-precedence and Secure-flag tests need to keep +test cases from leaking module-level state into each other. +""" + +from __future__ import annotations + +from types import ModuleType + + +def _reset_serve_module_state(serve_mod: ModuleType) -> None: + """Clear module-level caches that survive `importlib.reload()`. + + `importlib.reload(serve_mod)` rebinds the *module object*, but two + pieces of process-wide state can still leak from a previous test: + + * `serve_mod._initialized_jobs_dbs` — set[Path] tracking which + jobs.db files have had their schema bootstrap run. A test that + flips DATA_ROOT between parametrizations needs this cleared, + or the new path inherits the old "already bootstrapped" + assumption and the schema migration is silently skipped. + * `core.auth._metadata` and `core.auth._jwks` — cached OIDC + discovery doc and JWKS. A test that flips `WXYC_AUTH_ISSUER` + between parametrizations needs these cleared, or the new + issuer's auth calls run against stale metadata. + + REQUIRED: call this unconditionally after every + `importlib.reload(serve_mod)` and every `importlib.reload(auth_mod)` + — including when the test under construction looks orthogonal to + these caches. Whether a future parametrization touches DATA_ROOT or + WXYC_AUTH_ISSUER is not predictable from inside one test case; + skipping the reset produces flakes that depend on test execution + order, which is the failure mode you do not want. The cost of + calling it when unneeded is two `set.clear()` calls. + """ + import core.auth as auth_mod + + if hasattr(serve_mod, "_initialized_jobs_dbs"): + serve_mod._initialized_jobs_dbs.clear() + auth_mod._reset_metadata_cache() diff --git a/tests/unit/test_jobs.py b/tests/unit/test_jobs.py index 5f8d03f..4cc8ec0 100644 --- a/tests/unit/test_jobs.py +++ b/tests/unit/test_jobs.py @@ -285,3 +285,78 @@ async def test_init_adds_late_columns_to_existing_db(tmp_path: Path) -> None: assert "verified_at" in cols assert "verified_path" in cols assert "corrections_path" in cols + # reviewer_id is the late column added by the OIDC PR. Without this + # branch the migration on an existing prod jobs.db would silently + # leave the column off and every mark_verified(reviewer_id=...) call + # would persist no reviewer at all. + assert "reviewer_id" in cols + + +# -- reviewer_id (OIDC PR) ------------------------------------------------- + + +async def test_mark_verified_round_trips_reviewer_id(store: JobStore, tmp_path: Path) -> None: + """`mark_verified(..., reviewer_id="u-123")` persists to `jobs.reviewer_id`. + + The verifier server denormalizes the OIDC user_id onto the row so + per-reviewer queries don't have to parse every verified.json on disk. + """ + await store.register("scans/a.pdf", 1) + await store.mark_rendered("scans/a.pdf", 1, image_path=tmp_path / "a.png") + await store.mark_completed("scans/a.pdf", 1, result_path=tmp_path / "a.json", model_version="m") + + matched = await store.mark_verified( + "scans/a.pdf", + 1, + verified_path=tmp_path / "a.verified.json", + corrections_path=tmp_path / "a.corrections.json", + reviewer_id="u-123", + ) + assert matched is True + + job = await store.get("scans/a.pdf", 1) + assert job is not None + assert job.reviewer_id == "u-123" + + +async def test_mark_verified_accepts_none_reviewer_id(store: JobStore, tmp_path: Path) -> None: + """`reviewer_id=None` (the default) writes NULL — the BasicAuth and + no-auth deployments don't have a reviewer to credit and the column + should accept that without a NOT NULL violation.""" + await store.register("scans/a.pdf", 1) + await store.mark_rendered("scans/a.pdf", 1, image_path=tmp_path / "a.png") + + matched = await store.mark_verified( + "scans/a.pdf", + 1, + verified_path=tmp_path / "a.verified.json", + corrections_path=tmp_path / "a.corrections.json", + # reviewer_id omitted; defaults to None. + ) + assert matched is True + + job = await store.get("scans/a.pdf", 1) + assert job is not None + assert job.reviewer_id is None + + +async def test_mark_verified_overwrites_reviewer_id_on_re_save( + store: JobStore, tmp_path: Path +) -> None: + """A second `mark_verified` call updates `reviewer_id` to the latest + reviewer. Verification is "who last saved this page", not an + append-only log — matches the file-side semantics where verified.json + is rewritten by every save.""" + await store.register("scans/a.pdf", 1) + await store.mark_rendered("scans/a.pdf", 1, image_path=tmp_path / "a.png") + + paths = { + "verified_path": tmp_path / "a.verified.json", + "corrections_path": tmp_path / "a.corrections.json", + } + await store.mark_verified("scans/a.pdf", 1, **paths, reviewer_id="first") + await store.mark_verified("scans/a.pdf", 1, **paths, reviewer_id="second") + + job = await store.get("scans/a.pdf", 1) + assert job is not None + assert job.reviewer_id == "second" diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c082267..37a6d7c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -214,6 +214,80 @@ def test_rejects_duplicate_quadrant_positions(self) -> None: extracted_at=datetime.now(UTC), ) + def test_verified_by_defaults_to_none(self) -> None: + """A PageResult written by the pipeline has no reviewer. Default + matters: every verified.json on disk that pre-dates the OIDC PR + re-loads with `verified_by=None`, and no migration is needed.""" + page = PageResult( + page_date_raw=None, + quadrants=[ + self._quad("top_left"), + self._quad("top_right"), + self._quad("bottom_left"), + self._quad("bottom_right"), + ], + model_version="m", + extracted_at=datetime.now(UTC), + ) + assert page.verified_by is None + + def test_old_verified_json_without_verified_by_loads(self) -> None: + """Backwards compat: a verified.json saved before the OIDC PR + omits `verified_by` and must still parse. Exercises the same + load path the verifier server runs on every save (model_validate + of the on-disk JSON).""" + from core.schema import PageResult + + old = { + "page_date_raw": None, + "quadrants": [ + { + "position": p, + "hour_raw": None, + "jock_raw": None, + "entries": [], + "oddities": [], + } + for p in ("top_left", "top_right", "bottom_left", "bottom_right") + ], + "comments_raw": None, + "oddities": [], + "model_version": "m", + "extracted_at": "2026-05-01T00:00:00Z", + # No verified_by key — the pre-OIDC shape. + } + page = PageResult.model_validate(old) + assert page.verified_by is None + + def test_verified_by_round_trips_through_pageresult(self) -> None: + """A populated `verified_by` survives encode -> decode through + `model_dump_json` -> `model_validate_json` so the verifier + server can write the file and the SPA reload reads back the + exact block it wrote.""" + from core.schema import PageResult, VerifiedBy + + page = PageResult( + page_date_raw=None, + quadrants=[ + self._quad(p) for p in ("top_left", "top_right", "bottom_left", "bottom_right") + ], + model_version="m", + extracted_at=datetime.now(UTC), + verified_by=VerifiedBy( + user_id="u-1", + username="reviewer", + real_name="Real Name", + dj_name="DJ Name", + verified_at=datetime.now(UTC), + ), + ) + roundtripped = PageResult.model_validate_json(page.model_dump_json()) + assert roundtripped.verified_by is not None + assert roundtripped.verified_by.user_id == "u-1" + assert roundtripped.verified_by.username == "reviewer" + assert roundtripped.verified_by.real_name == "Real Name" + assert roundtripped.verified_by.dj_name == "DJ Name" + def test_confidence_values() -> None: # Sanity: documents the exact set the pipeline contracts on. diff --git a/tests/unit/test_verifier_auth_routes.py b/tests/unit/test_verifier_auth_routes.py new file mode 100644 index 0000000..9a9d85b --- /dev/null +++ b/tests/unit/test_verifier_auth_routes.py @@ -0,0 +1,907 @@ +"""Tests for the OIDC auth routes + session middleware in verifier/serve.py. + +Covers: + * `/auth/login` builds the authorize URL, sets 3 signed one-shot cookies. + * `/auth/callback` validates state + return_to, runs `exchange_code`, + seals the session cookie, clears one-shots. Transport failures + translate to 503; claim/signature failures translate to 400. + * `/auth/logout` clears the session cookie. + * `/auth/me` returns 401 without a session, 200 with one. + * `/api/save` requires a session, overwrites client-supplied + `verified_by` from the authenticated reviewer (server is authority), + and threads `reviewer_id` into `jobs.db`. + * Session middleware bypass for `_PUBLIC_PATHS` + 302 redirect for + HTML + 401 JSON for AJAX. + * Tri-modal middleware install: OIDC > BasicAuth > none. + * Cookie `Secure` flag gated on `FLOWSHEET_PUBLIC_URL` scheme. + +These tests use the existing `httpx.ASGITransport` pattern from +`test_verifier_serve.py` to exercise the FastAPI app in-process. The +auth-server calls are mocked at the `core.auth.build_authorize_url` +/ `core.auth.exchange_code` seam — no live OIDC needed. +""" + +from __future__ import annotations + +import importlib +import json +from datetime import UTC, datetime +from pathlib import Path +from typing import Any +from urllib.parse import parse_qs, urlparse + +import httpx +import pytest +from httpx import ASGITransport, AsyncClient + +import core.auth as auth_mod +from core.auth import ReviewerSession +from core.schema import QUADRANT_ORDER, PageResult, Quadrant +from tests.unit.conftest import _reset_serve_module_state + +ISSUER = "https://auth.example/auth" +CLIENT_ID = "flowsheet-test" +CLIENT_SECRET = "test-client-secret" +PUBLIC_URL = "https://flowsheet.example" +SESSION_SECRET = "x" * 64 + + +def _set_oidc_env(monkeypatch: pytest.MonkeyPatch, *, public_url: str = PUBLIC_URL) -> None: + monkeypatch.setenv("WXYC_AUTH_ISSUER", ISSUER) + monkeypatch.setenv("WXYC_OIDC_CLIENT_ID", CLIENT_ID) + monkeypatch.setenv("WXYC_OIDC_CLIENT_SECRET", CLIENT_SECRET) + monkeypatch.setenv("WXYC_SESSION_SECRET", SESSION_SECRET) + monkeypatch.setenv("FLOWSHEET_PUBLIC_URL", public_url) + + +def _page_result_dict() -> dict[str, Any]: + """Minimal valid PageResult payload, matching test_verifier_serve.py.""" + return PageResult( + page_date_raw="Mon 1 Jan 90", + quadrants=[ + Quadrant(position=p, hour_raw=None, jock_raw=None, entries=[], oddities=[]) + for p in QUADRANT_ORDER + ], + comments_raw=None, + oddities=[], + model_version="test-model", + extracted_at=datetime(2026, 5, 12, tzinfo=UTC), + ).model_dump(mode="json") + + +def _corrections_dict() -> dict[str, Any]: + return { + "stem": "test", + "model_version": "test-model", + "extracted_at": "2026-05-12T00:00:00Z", + "exported_at": "2026-05-12T00:00:01Z", + "page_corrections": [], + "quadrant_corrections": [], + "row_corrections": [], + "added_rows": [], + "deleted_rows": [], + } + + +def _make_reviewer() -> ReviewerSession: + return ReviewerSession( + user_id="reviewer-real-id", + email="dj@wxyc.org", + username="dj_radio", + real_name="Real Name", + dj_name="Stage Name", + role="dj", + ) + + +@pytest.fixture +def serve_app(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + """Build a fresh OIDC-enabled FastAPI app rooted at `tmp_path`. + + Reloads `verifier.serve` after env is set so `OIDC_ENABLED` + + the middleware install pick up our env. Cleans up module-level + state on teardown so subsequent tests start clean. + """ + monkeypatch.setenv("DATA_ROOT", str(tmp_path / "data")) + _set_oidc_env(monkeypatch) + + import verifier.serve as serve_mod + + importlib.reload(serve_mod) + _reset_serve_module_state(serve_mod) + yield serve_mod + monkeypatch.undo() + importlib.reload(serve_mod) + _reset_serve_module_state(serve_mod) + + +async def _client(app): + return AsyncClient(transport=ASGITransport(app=app), base_url="http://test") + + +def _session_cookie_for(reviewer: ReviewerSession) -> str: + """Mint a session cookie value the way the callback handler does. + + Tests that want to skip the OIDC dance and exercise a protected + endpoint directly use this to plant a session. + """ + return auth_mod.encode_session(reviewer) + + +# -- /auth/login ----------------------------------------------------------- + + +async def test_auth_login_redirects_with_signed_oneshots( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """/auth/login 302s to the auth server with code_challenge + state, + and sets `oidc_state` / `oidc_verifier` / `oidc_return_to` as signed + one-shot cookies. Without those cookies, `/auth/callback` can't + validate the round-trip — pin the contract so a refactor doesn't + drop any of the three.""" + + async def fake_authorize() -> tuple[str, str, str]: + return ("https://auth.example/oauth2/authorize?fake=1", "state-value", "verifier-value") + + monkeypatch.setattr(auth_mod, "build_authorize_url", fake_authorize) + + async with await _client(serve_app.app) as c: + r = await c.get("/auth/login?return_to=/verifier/?bundle=x", follow_redirects=False) + + assert r.status_code == 302 + assert r.headers["location"].startswith("https://auth.example/oauth2/authorize") + cookies = r.headers.get_list("set-cookie") + cookie_names = {c.split("=", 1)[0] for c in cookies} + assert "oidc_state" in cookie_names + assert "oidc_verifier" in cookie_names + assert "oidc_return_to" in cookie_names + + +async def test_auth_login_503_on_auth_server_transport_failure( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """If discovery doc fetch fails, /auth/login surfaces 503 (not 500) + so the SPA can show a "try again" instead of a generic crash.""" + + async def boom() -> tuple[str, str, str]: + raise httpx.ConnectError("discovery unreachable") + + monkeypatch.setattr(auth_mod, "build_authorize_url", boom) + + async with await _client(serve_app.app) as c: + r = await c.get("/auth/login", follow_redirects=False) + assert r.status_code == 503 + + +# -- /auth/callback -------------------------------------------------------- + + +async def _seed_one_shots(c: AsyncClient, state: str, verifier: str, return_to: str) -> None: + """Plant the three signed one-shot cookies a callback round-trip + would have set during /auth/login. + + Httpx's cookie jar would need cookie attributes; setting raw via the + `Cookie` header on subsequent requests keeps the test simple. + """ + signed_state = auth_mod.sign_one_shot(state) + signed_verifier = auth_mod.sign_one_shot(verifier) + signed_return_to = auth_mod.sign_one_shot(return_to) + c.cookies.set("oidc_state", signed_state) + c.cookies.set("oidc_verifier", signed_verifier) + c.cookies.set("oidc_return_to", signed_return_to) + + +async def test_auth_callback_happy_path(serve_app, monkeypatch: pytest.MonkeyPatch) -> None: + """Valid state + valid exchange → session cookie set, 302 to return_to.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + assert code == "auth-code" + assert code_verifier == "the-verifier" + return _make_reviewer() + + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + await _seed_one_shots(c, "the-state", "the-verifier", "/verifier/?bundle=x") + r = await c.get("/auth/callback?code=auth-code&state=the-state", follow_redirects=False) + assert r.status_code == 302 + assert r.headers["location"] == "/verifier/?bundle=x" + cookie_names = {c.split("=", 1)[0] for c in r.headers.get_list("set-cookie")} + assert "flowsheet_session" in cookie_names + + +async def test_auth_callback_400_on_state_mismatch( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """A `state` query param that doesn't match the signed cookie value + is a CSRF attempt (or a botched cookie jar) — return 400, do not + proceed to token exchange.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + raise AssertionError("exchange_code must not be called when state mismatches") + + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + await _seed_one_shots(c, "expected-state", "v", "/verifier/") + r = await c.get("/auth/callback?code=auth-code&state=wrong-state", follow_redirects=False) + assert r.status_code == 400 + + +async def test_auth_callback_400_on_tampered_state_cookie( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """A tampered `oidc_state` cookie produces 400 — the signed + one-shot's tamper resistance is load-bearing for the CSRF check.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + raise AssertionError("exchange must not run when state cookie is bad") + + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + c.cookies.set("oidc_state", "not-a-real-signed-value") + c.cookies.set("oidc_verifier", auth_mod.sign_one_shot("v")) + c.cookies.set("oidc_return_to", auth_mod.sign_one_shot("/verifier/")) + r = await c.get("/auth/callback?code=auth-code&state=anything", follow_redirects=False) + assert r.status_code == 400 + + +@pytest.mark.parametrize( + "return_to", + [ + "https://evil.example/", + "//evil.example/verifier", + # Word-boundary defense: '/verifierx-admin' looks like /verifier + # to a naive startswith check; the allowlist must reject it so + # an attacker can't smuggle a hostile route through a future + # mount whose name shares the prefix. + "/verifierx-admin/login", + "/verifierdata/leak", + ], +) +async def test_auth_callback_falls_back_to_verifier_for_off_allowlist_return_to( + return_to: str, serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """Off-allowlist return_to → fallback to `/verifier/`. Open-redirect + defense AND a word-boundary check so `/verifier*` siblings can't + smuggle through a naive `startswith("/verifier")` check.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + return _make_reviewer() + + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + await _seed_one_shots(c, "s", "v", return_to) + r = await c.get("/auth/callback?code=c&state=s", follow_redirects=False) + assert r.status_code == 302 + assert r.headers["location"] == "/verifier/" + + +async def test_auth_login_503_when_env_var_missing( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """A partial OIDC config — WXYC_OIDC_CLIENT_ID set but + WXYC_AUTH_ISSUER unset — surfaces as 503 ('auth not configured') + rather than 500 with the env-var name in the traceback. + Mirrors `core.auth.decode_session`'s RuntimeError catch for the + middleware path. + """ + + async def boom() -> tuple[str, str, str]: + raise RuntimeError("WXYC_AUTH_ISSUER is not set") + + monkeypatch.setattr(auth_mod, "build_authorize_url", boom) + async with await _client(serve_app.app) as c: + r = await c.get("/auth/login", follow_redirects=False) + assert r.status_code == 503 + + +async def test_auth_login_503_when_session_secret_unset( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """`build_authorize_url` succeeds but the subsequent `sign_one_shot` + calls reach `_session_secret()`, which raises RuntimeError if + WXYC_SESSION_SECRET is unset. The /auth/login handler must wrap + BOTH the URL build and the cookie-sealing steps so a config + rollout that updates WXYC_OIDC_CLIENT_ID and forgets + WXYC_SESSION_SECRET still produces 503 (not 500). Regression + guard for the earlier shape that only wrapped build_authorize_url. + """ + + async def fake_authorize() -> tuple[str, str, str]: + return ("https://auth.example/oauth2/authorize?fake=1", "state-x", "verifier-x") + + monkeypatch.setattr(auth_mod, "build_authorize_url", fake_authorize) + # Now blank WXYC_SESSION_SECRET so sign_one_shot raises RuntimeError. + monkeypatch.delenv("WXYC_SESSION_SECRET", raising=False) + async with await _client(serve_app.app) as c: + r = await c.get("/auth/login", follow_redirects=False) + assert r.status_code == 503 + + +async def test_auth_callback_503_when_env_var_missing( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """Same protection on the callback path — RuntimeError from + exchange_code (e.g., env var blanked mid-process) is mapped to 503, + not 500. + """ + + async def boom(*, code: str, code_verifier: str) -> ReviewerSession: + raise RuntimeError("WXYC_OIDC_CLIENT_SECRET is not set") + + monkeypatch.setattr(auth_mod, "exchange_code", boom) + async with await _client(serve_app.app) as c: + await _seed_one_shots(c, "s", "v", "/verifier/") + r = await c.get("/auth/callback?code=c&state=s", follow_redirects=False) + assert r.status_code == 503 + + +async def test_auth_callback_503_on_token_endpoint_transport_failure( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """An httpx error from `exchange_code` (e.g., auth server slow) is + 503 not 500 so the SPA can distinguish transport from claim failure.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + raise httpx.ConnectError("token endpoint unreachable") + + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + await _seed_one_shots(c, "s", "v", "/verifier/") + r = await c.get("/auth/callback?code=c&state=s", follow_redirects=False) + assert r.status_code == 503 + + +async def test_auth_callback_400_on_claim_validation_failure( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """A `JoseError` from `exchange_code` (e.g., bad signature, aud + mismatch) is 400 — the user needs to redo the login, not a 500 + crash.""" + from authlib.jose.errors import JoseError + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + raise JoseError("invalid aud") + + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + await _seed_one_shots(c, "s", "v", "/verifier/") + r = await c.get("/auth/callback?code=c&state=s", follow_redirects=False) + assert r.status_code == 400 + + +# -- /auth/logout ---------------------------------------------------------- + + +async def test_auth_logout_clears_session_cookie(serve_app) -> None: + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(_make_reviewer())) + r = await c.post("/auth/logout") + assert r.status_code == 200 + # `Set-Cookie` for delete sends an immediate-expiry cookie value. + set_cookies = r.headers.get_list("set-cookie") + assert any("flowsheet_session" in sc and "Max-Age=0" in sc for sc in set_cookies), ( + f"expected a delete on flowsheet_session, got: {set_cookies}" + ) + + +# -- /auth/me -------------------------------------------------------------- + + +async def test_auth_me_401_without_session(serve_app) -> None: + async with await _client(serve_app.app) as c: + # Accept: application/json so the middleware returns 401 JSON + # rather than a 302 redirect. + r = await c.get( + "/auth/me", + headers={"accept": "application/json"}, + follow_redirects=False, + ) + assert r.status_code == 401 + + +async def test_auth_me_returns_reviewer_with_valid_session(serve_app) -> None: + """With a valid session cookie, /auth/me returns the ReviewerSession + as JSON. The SPA reads this on load to render the reviewer's name.""" + reviewer = _make_reviewer() + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(reviewer)) + r = await c.get("/auth/me") + assert r.status_code == 200 + body = r.json() + assert body["user_id"] == reviewer.user_id + assert body["real_name"] == reviewer.real_name + assert body["dj_name"] == reviewer.dj_name + + +# -- /api/version unauthenticated ----------------------------------------- + + +async def test_api_version_is_public(serve_app) -> None: + """`/api/version` must stay public — it's the Railway healthcheck. + A regression here breaks every deploy because the healthcheck would + redirect to /auth/login. Load-bearing assertion.""" + async with await _client(serve_app.app) as c: + r = await c.get("/api/version") + assert r.status_code == 200 + assert "version" in r.json() + + +# -- /api/save ------------------------------------------------------------- + + +async def test_api_save_redirects_without_session_for_html_requests(serve_app) -> None: + """A browser nav (no `Accept: application/json`) without a session + redirects to /auth/login with `return_to=` set to the original path.""" + async with await _client(serve_app.app) as c: + r = await c.post( + "/api/save", + json={"stem": "x", "verified": _page_result_dict(), "corrections": _corrections_dict()}, + follow_redirects=False, + ) + assert r.status_code == 302 + assert r.headers["location"].startswith("/auth/login?return_to=") + + +async def test_api_save_returns_401_json_for_ajax_without_session(serve_app) -> None: + """A fetch() request with `Accept: application/json` gets a JSON + 401 so the SPA can redirect via `location.href`.""" + async with await _client(serve_app.app) as c: + r = await c.post( + "/api/save", + headers={"accept": "application/json"}, + json={"stem": "x", "verified": _page_result_dict(), "corrections": _corrections_dict()}, + follow_redirects=False, + ) + assert r.status_code == 401 + + +async def test_api_save_writes_verified_by_from_session(serve_app, tmp_path: Path) -> None: + """With a session, /api/save writes a `verified_by` block populated + from the reviewer's identity. The block includes user_id, name, + dj_name.""" + reviewer = _make_reviewer() + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(reviewer)) + r = await c.post( + "/api/save", + json={ + "stem": "page25", + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + + on_disk = json.loads((tmp_path / "data" / "verifier" / "page25.verified.json").read_text()) + vb = on_disk["verified_by"] + assert vb is not None + assert vb["user_id"] == reviewer.user_id + assert vb["username"] == reviewer.username + assert vb["real_name"] == reviewer.real_name + assert vb["dj_name"] == reviewer.dj_name + assert "verified_at" in vb + + +async def test_api_save_discards_client_supplied_verified_by(serve_app, tmp_path: Path) -> None: + """Server-authority: a client that crafts a `verified_by` block in + the payload gets it overwritten with the server's view of the + authenticated reviewer. Protects against credential confusion via + a buggy or hostile client.""" + reviewer = _make_reviewer() + polluted = _page_result_dict() + polluted["verified_by"] = { + "user_id": "spoofed-id", + "username": "attacker", + "real_name": "Not Real", + "dj_name": "Fake", + "verified_at": "2020-01-01T00:00:00Z", + } + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(reviewer)) + r = await c.post( + "/api/save", + json={ + "stem": "spoof-test", + "verified": polluted, + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + on_disk = json.loads((tmp_path / "data" / "verifier" / "spoof-test.verified.json").read_text()) + assert on_disk["verified_by"]["user_id"] == reviewer.user_id + # And the spoofed verified_at was replaced with the server's clock. + assert on_disk["verified_by"]["verified_at"] != "2020-01-01T00:00:00Z" + + +async def test_api_save_rejects_malformed_client_verified_by(serve_app) -> None: + """A malformed `verified_by` (wrong type) fails Pydantic + validation BEFORE the server-authority overwrite runs. The error + surfaces as 400 (defense-in-depth — the overwrite would have + discarded the value, but exposing a corrupt shape via 400 is better + than silently accepting it).""" + reviewer = _make_reviewer() + cases = [ + {"verified_by": 123}, + {"verified_by": []}, + {"verified_by": "hello"}, + {"verified_by": {}}, # missing required user_id + verified_at + ] + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(reviewer)) + for i, override in enumerate(cases): + verified = {**_page_result_dict(), **override} + r = await c.post( + "/api/save", + json={ + "stem": f"malformed-{i}", + "verified": verified, + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 400, f"case {i}={override!r} expected 400 got {r.status_code}" + + +async def test_api_save_writes_reviewer_id_to_jobs_db(serve_app, tmp_path: Path) -> None: + """With pdf_path + page_number + a session, mark_verified is called + with the reviewer's user_id and the jobs.db row picks it up.""" + from core.jobs import JobStore + + db_path = tmp_path / "data" / "jobs.db" + db_path.parent.mkdir(parents=True, exist_ok=True) + store = JobStore(db_path) + await store.init() + await store.register("1990/x.pdf", 1) + await store.mark_rendered("1990/x.pdf", 1, image_path=tmp_path / "x.png") + await store.mark_completed("1990/x.pdf", 1, result_path=tmp_path / "x.json", model_version="m") + + reviewer = _make_reviewer() + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(reviewer)) + r = await c.post( + "/api/save", + json={ + "stem": "x-page-01", + "pdf_path": "1990/x.pdf", + "page_number": 1, + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + assert r.json()["db_updated"] is True + + job = await store.get("1990/x.pdf", 1) + assert job is not None + assert job.reviewer_id == reviewer.user_id + + +async def test_api_save_upgrades_pre_oidc_file_on_save(serve_app, tmp_path: Path) -> None: + """An old verified.json on disk (no `verified_by` key) loads via + Pydantic, gets the server-authoritative block added, and is written + back. This is the round-trip the SPA performs on every save against + files that existed before the OIDC PR. + """ + # Plant an old verified.json directly so the test exercises the + # full load → modify → save shape. `_page_result_dict()` always + # serializes `verified_by: None` because the field has a default — + # pop it explicitly to mirror a file written before the field + # existed on the schema at all. + verifier_dir = tmp_path / "data" / "verifier" + verifier_dir.mkdir(parents=True, exist_ok=True) + old = _page_result_dict() + old.pop("verified_by", None) + assert "verified_by" not in old # baseline: shape is truly pre-OIDC + (verifier_dir / "legacy.verified.json").write_text(json.dumps(old)) + + reviewer = _make_reviewer() + async with await _client(serve_app.app) as c: + c.cookies.set("flowsheet_session", _session_cookie_for(reviewer)) + r = await c.post( + "/api/save", + json={ + "stem": "legacy", + # SPA round-trips whatever it read; we mirror that here. + "verified": old, + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + saved = json.loads((verifier_dir / "legacy.verified.json").read_text()) + assert saved["verified_by"]["user_id"] == reviewer.user_id + + +# -- middleware precedence ------------------------------------------------- + + +def _reload_with_env( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + *, + oidc: bool, + basic: bool, + public_url: str = PUBLIC_URL, +): + """Reload `verifier.serve` with a controlled middleware-install env. + + Tests that toggle (oidc, basic) parametrizations call this to + rebuild the app. `_reset_serve_module_state` is invoked after the + reload — see its docstring for the why. + """ + monkeypatch.setenv("DATA_ROOT", str(tmp_path / "data")) + if oidc: + _set_oidc_env(monkeypatch, public_url=public_url) + else: + for k in ( + "WXYC_OIDC_CLIENT_ID", + "WXYC_OIDC_CLIENT_SECRET", + "WXYC_AUTH_ISSUER", + "WXYC_SESSION_SECRET", + ): + monkeypatch.delenv(k, raising=False) + if basic: + monkeypatch.setenv("VERIFIER_PASSWORD", "test-password") + else: + monkeypatch.delenv("VERIFIER_PASSWORD", raising=False) + # FLOWSHEET_PUBLIC_URL controls the cookie Secure flag and is used + # in the Secure-flag tests too. + monkeypatch.setenv("FLOWSHEET_PUBLIC_URL", public_url) + + import verifier.serve as serve_mod + + importlib.reload(serve_mod) + _reset_serve_module_state(serve_mod) + return serve_mod + + +async def test_oidc_wins_over_basicauth_when_both_set( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """When both `WXYC_OIDC_CLIENT_ID` and `VERIFIER_PASSWORD` are + set, the OIDC middleware installs and BasicAuth does not. A request + with Basic credentials but no session cookie redirects to + /auth/login (the BasicAuth gate would have returned 200 here).""" + from base64 import b64encode + + serve_mod = _reload_with_env(monkeypatch, tmp_path, oidc=True, basic=True) + creds = b64encode(b"verifier:test-password").decode() + async with await _client(serve_mod.app) as c: + r = await c.post( + "/api/save", + headers={"authorization": f"Basic {creds}"}, + json={ + "stem": "x", + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + follow_redirects=False, + ) + # OIDC middleware ignores Basic and redirects (HTML default Accept). + assert r.status_code == 302 + assert r.headers["location"].startswith("/auth/login") + + +async def test_basicauth_used_when_only_password_set( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Without OIDC env, `VERIFIER_PASSWORD` alone installs BasicAuth. + A request without credentials returns the existing 401 + WWW- + Authenticate header. Mirrors the pre-OIDC deployment shape.""" + serve_mod = _reload_with_env(monkeypatch, tmp_path, oidc=False, basic=True) + async with await _client(serve_mod.app) as c: + r = await c.get("/api/version") + assert r.status_code == 401 + assert "WWW-Authenticate" in r.headers + + +async def test_no_gate_when_neither_set(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """The local-dev default — neither OIDC nor BasicAuth — leaves + every endpoint open.""" + serve_mod = _reload_with_env(monkeypatch, tmp_path, oidc=False, basic=False) + async with await _client(serve_mod.app) as c: + r = await c.post( + "/api/save", + json={ + "stem": "open", + "verified": _page_result_dict(), + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + saved = json.loads((tmp_path / "data" / "verifier" / "open.verified.json").read_text()) + # No reviewer → verified_by stays None. + assert saved["verified_by"] is None + + +async def test_no_auth_save_preserves_existing_verified_by( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """A re-save in BasicAuth / no-auth mode must NOT clobber a + previously-recorded `verified_by` block on the on-disk file. + + Sequence: an earlier OIDC-mode save left Alice's identity on the + file. The deployment is briefly flipped to no-auth (e.g. during an + OIDC outage rollback) and a volunteer re-saves the same stem. Per + the data-safety rule ('never overwrite successfully collected + data'), the existing `verified_by` block must survive even though + the current request has no authenticated reviewer to credit. The + bug this guards against: an unconditional `validated.verified_by = + None` silently destroys the provenance record on every re-save. + """ + verifier_dir = tmp_path / "data" / "verifier" + verifier_dir.mkdir(parents=True, exist_ok=True) + # Plant a verified.json that already carries Alice's identity (as + # an OIDC save would have written). + prior_with_alice = { + **_page_result_dict(), + "verified_by": { + "user_id": "alice-id", + "username": "alice", + "real_name": "Alice Real", + "dj_name": "DJ Alice", + "verified_at": "2026-05-01T12:00:00Z", + }, + } + (verifier_dir / "preserved.verified.json").write_text(json.dumps(prior_with_alice)) + + serve_mod = _reload_with_env(monkeypatch, tmp_path, oidc=False, basic=False) + async with await _client(serve_mod.app) as c: + r = await c.post( + "/api/save", + json={ + "stem": "preserved", + # The SPA round-trips the file's shape; mirror that. + "verified": prior_with_alice, + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + saved = json.loads((verifier_dir / "preserved.verified.json").read_text()) + # Alice's identity survives the no-auth re-save. + assert saved["verified_by"] is not None + assert saved["verified_by"]["user_id"] == "alice-id" + assert saved["verified_by"]["real_name"] == "Alice Real" + + +async def test_no_auth_save_ignores_client_supplied_verified_by_when_no_prior( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """In no-auth mode with NO prior on-disk file, a client-supplied + `verified_by` is still discarded — the server has no identity to + assert and no prior data to preserve, so the saved file's + `verified_by` is `None`. Pins the anti-spoof rule in non-OIDC + mode (a client can't write provenance unilaterally). + """ + serve_mod = _reload_with_env(monkeypatch, tmp_path, oidc=False, basic=False) + spoofed = { + **_page_result_dict(), + "verified_by": { + "user_id": "attacker", + "username": "evil", + "real_name": None, + "dj_name": None, + "verified_at": "2026-05-01T00:00:00Z", + }, + } + async with await _client(serve_mod.app) as c: + r = await c.post( + "/api/save", + json={ + "stem": "fresh", + "verified": spoofed, + "corrections": _corrections_dict(), + }, + ) + assert r.status_code == 200 + saved = json.loads((tmp_path / "data" / "verifier" / "fresh.verified.json").read_text()) + assert saved["verified_by"] is None + + +# -- cookie Secure flag ---------------------------------------------------- + + +async def test_cookie_secure_on_https_public_url( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """With FLOWSHEET_PUBLIC_URL=https://..., the session cookie set on + /auth/callback carries `Secure`. Prevents the cookie from being + sent over a downgraded HTTP request.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + return _make_reviewer() + + serve_mod = _reload_with_env( + monkeypatch, tmp_path, oidc=True, basic=False, public_url="https://flowsheet.example" + ) + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_mod.app) as c: + await _seed_one_shots(c, "s", "v", "/verifier/") + r = await c.get("/auth/callback?code=c&state=s", follow_redirects=False) + set_cookies = r.headers.get_list("set-cookie") + session_sc = next(sc for sc in set_cookies if sc.startswith("flowsheet_session=")) + assert "Secure" in session_sc + + +async def test_cookie_not_secure_on_http_public_url( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """With FLOWSHEET_PUBLIC_URL=http://localhost:8765, the session + cookie does NOT carry `Secure` — local dev runs plain HTTP and a + Secure cookie would never be sent at all.""" + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + return _make_reviewer() + + serve_mod = _reload_with_env( + monkeypatch, tmp_path, oidc=True, basic=False, public_url="http://localhost:8765" + ) + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_mod.app) as c: + await _seed_one_shots(c, "s", "v", "/verifier/") + r = await c.get("/auth/callback?code=c&state=s", follow_redirects=False) + set_cookies = r.headers.get_list("set-cookie") + session_sc = next(sc for sc in set_cookies if sc.startswith("flowsheet_session=")) + assert "Secure" not in session_sc + + +# -- return_to round-trip -------------------------------------------------- + + +async def test_login_return_to_propagates_through_callback( + serve_app, monkeypatch: pytest.MonkeyPatch +) -> None: + """The /auth/login → /auth/callback round-trip preserves return_to + when it's a valid /verifier-relative path. Pins the integration of + the one-shot cookie + the callback's allowlist check. + + We bypass httpx's cookie jar (which struggles with the path=/auth + attribute the login handler sets) and read the Set-Cookie headers + directly, then plant them on the next request. Confirms the wire + format end-to-end rather than relying on jar semantics. + """ + captured: dict[str, Any] = {} + + async def fake_authorize() -> tuple[str, str, str]: + return ("https://auth.example/oauth2/authorize?fake=1", "round-trip-state", "the-verifier") + + async def fake_exchange(*, code: str, code_verifier: str) -> ReviewerSession: + captured["code"] = code + captured["verifier"] = code_verifier + return _make_reviewer() + + monkeypatch.setattr(auth_mod, "build_authorize_url", fake_authorize) + monkeypatch.setattr(auth_mod, "exchange_code", fake_exchange) + + async with await _client(serve_app.app) as c: + r1 = await c.get("/auth/login?return_to=/verifier/?bundle=abc", follow_redirects=False) + assert r1.status_code == 302 + assert urlparse(r1.headers["location"]).netloc == "auth.example" + # Extract the three one-shot cookie values from Set-Cookie + # headers and re-plant them on the client so the callback + # request carries them. + for sc in r1.headers.get_list("set-cookie"): + name, _, rest = sc.partition("=") + if name in {"oidc_state", "oidc_verifier", "oidc_return_to"}: + value = rest.partition(";")[0] + c.cookies.set(name, value) + r2 = await c.get( + "/auth/callback?code=AUTH-CODE&state=round-trip-state", + follow_redirects=False, + ) + assert r2.status_code == 302 + assert r2.headers["location"] == "/verifier/?bundle=abc" + assert captured["code"] == "AUTH-CODE" + assert captured["verifier"] == "the-verifier" + + +# Smoke check: prevent the "I forgot to import parse_qs" surprise where +# the test file fails to parse if a transitive ref disappears. +def test_helpers_import() -> None: + assert parse_qs("a=b")["a"] == ["b"] diff --git a/verifier/app.js b/verifier/app.js index 7ccfb13..429edff 100644 --- a/verifier/app.js +++ b/verifier/app.js @@ -25,6 +25,72 @@ const SUPPORTED_SCHEMA_VERSION = 2; +// ---- auth: /auth/me + 401 → /auth/login redirect ------------------------ +// +// When the verifier is deployed with OIDC enabled, every /api/* and +// /verifier/* request goes through a session-cookie middleware. A +// missing or expired cookie produces: +// * 302 → /auth/login?return_to=... (for HTML nav) +// * 401 JSON (for fetch() requests with +// Accept: application/json) +// +// `authFetch` is the wrapper the SPA uses for every same-origin API +// call. On 401 it sends the user to /auth/login with the current path +// preserved as return_to, so after login they land back on the same +// page they were trying to use. +// +// When the deployment has NO auth gate (local-dev default), /auth/me +// returns 404 (no route installed). authFetch is a no-op around fetch +// in that case. + +function redirectToLogin() { + const returnTo = location.pathname + location.search; + location.href = "/auth/login?return_to=" + encodeURIComponent(returnTo); +} + +async function authFetch(input, init = {}) { + // Mark the request as JSON so the middleware returns 401 JSON + // instead of 302 — otherwise fetch() either follows the redirect + // silently (in which case the SPA can't react) or treats it as an + // opaque error. + const headers = new Headers(init.headers || {}); + if (!headers.has("Accept")) headers.set("Accept", "application/json"); + const response = await fetch(input, { ...init, headers }); + if (response.status === 401) { + redirectToLogin(); + // Throw so the calling code's catch surfaces a clear error rather + // than trying to parse an empty body. + throw new Error("authentication required"); + } + return response; +} + +async function renderReviewerName() { + // Fetch the current reviewer on load and show their name in the + // header. A 401 means the deployment has the gate enabled but our + // cookie is gone — let the middleware redirect us on the next API + // call rather than triggering an extra round-trip here. A 404 means + // the deployment has no auth, so nothing to render. + try { + const r = await fetch("/auth/me", { + headers: { "Accept": "application/json" }, + }); + if (!r.ok) return; + const me = await r.json(); + const label = me.real_name || me.dj_name || me.username || me.email || "Signed in"; + for (const id of ["reviewer-name", "reviewer-name-index"]) { + const el = document.getElementById(id); + if (!el) continue; + el.hidden = false; + el.textContent = label; + el.title = me.email || ""; + } + } catch { + // Network error — silently leave the chip hidden. The next save + // attempt will surface the failure via authFetch. + } +} + const state = { bundle: null, // mutable working copy originalBundle: null, // immutable snapshot for diffing @@ -57,7 +123,13 @@ async function loadBundleFromUrlParam() { // deploy (see scripts/railway_entrypoint.sh's overlay step). A // browser cache hit on a stale bundle would silently feed the old // bbox + entries into the UI. - const r = await fetch(path, { cache: "no-store" }); + // authFetch (not plain fetch): the /data/ static mount is behind + // the session middleware when OIDC is enabled. A plain fetch with + // no Accept: application/json would receive the middleware's 302 + // and silently follow it — then JSON.parse would fail on the + // login page HTML with a confusing "Unexpected token <" error. + // authFetch sets Accept and converts 401 into a clean redirect. + const r = await authFetch(path, { cache: "no-store" }); if (!r.ok) throw new Error(`fetch ${path}: ${r.status}`); const bundle = await r.json(); // If a verified.json exists alongside the bundle, overlay its data so @@ -73,7 +145,7 @@ async function loadBundleFromUrlParam() { // browser will happily reuse a pre-save copy on the next page load, // which displays the OLD value and looks like the save never // happened. - const vr = await fetch(verifiedPath, { cache: "no-store" }); + const vr = await authFetch(verifiedPath, { cache: "no-store" }); if (vr.ok) { const verified = await vr.json(); applyVerifiedToBundle(bundle, verified); @@ -194,7 +266,7 @@ function finishInit() { async function fetchBundleList() { // No-store: status changes on every save and the index list / pill // must reflect that immediately. Tiny payload, no reason to cache. - const r = await fetch("/api/bundles", { cache: "no-store" }); + const r = await authFetch("/api/bundles", { cache: "no-store" }); if (!r.ok) throw new Error(`/api/bundles ${r.status}`); const data = await r.json(); state.bundleList = data.bundles || []; @@ -614,7 +686,7 @@ async function saveAll(requestedStatus = null) { if (requestedStatus) body.status = requestedStatus; try { - const r = await fetch("/api/save", { + const r = await authFetch("/api/save", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -666,7 +738,7 @@ const toggleComplete = () => // ---- artist/track lookup (request-o-matic via /api/lookup proxy) -------- async function lookupOne(message) { - const r = await fetch("/api/lookup", { + const r = await authFetch("/api/lookup", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ message }), @@ -1112,6 +1184,7 @@ document.addEventListener("DOMContentLoaded", async () => { installKeyboardShortcuts(); // Fire-and-forget — must not block bundle loading. startVersionWatch().catch(() => {}); + renderReviewerName().catch(() => {}); const params = new URLSearchParams(location.search); const hasBundle = !!params.get("bundle"); if (!hasBundle) { diff --git a/verifier/index.html b/verifier/index.html index 232d926..d2b0abb 100644 --- a/verifier/index.html +++ b/verifier/index.html @@ -9,7 +9,7 @@ sends ETag/Last-Modified but no explicit Cache-Control, so browsers can hold a heuristically-fresh stale copy for tens of minutes; bumping the query forces a fresh URL. --> - + @@ -18,6 +18,7 @@

Flowsheet verifier

+
@@ -38,6 +39,7 @@

Flowsheet verifier

Loading bundle… + @@ -146,6 +148,6 @@

- + diff --git a/verifier/serve.py b/verifier/serve.py index 268beec..995f2d9 100644 --- a/verifier/serve.py +++ b/verifier/serve.py @@ -17,6 +17,22 @@ verification state (incomplete / partial / complete) for the index page and Prev/Next nav. +Auth (when WXYC_OIDC_CLIENT_ID is set): + + GET /auth/login — kick off the OIDC code+PKCE dance against + api.wxyc.org/auth. + GET /auth/callback — exchange the code, seal the reviewer into a + 12-hour signed session cookie, redirect home. + POST /auth/logout — clear the session cookie (local logout only — + api.wxyc.org session is unaffected). + GET /auth/me — return the currently logged-in ReviewerSession. + +The middleware gate is tri-modal: + * OIDC session cookie when WXYC_OIDC_CLIENT_ID is set. + * HTTP Basic Auth when VERIFIER_PASSWORD is set. + * No gate otherwise (local-dev default). +Modes are mutually exclusive; OIDC wins when both are configured. + Run: .venv/bin/python verifier/serve.py @@ -26,23 +42,35 @@ from __future__ import annotations +import dataclasses import json import os import secrets +import urllib.parse from base64 import b64decode from datetime import UTC, datetime from pathlib import Path +from typing import Annotated import httpx import uvicorn -from fastapi import FastAPI, HTTPException, Request -from fastapi.responses import JSONResponse +from authlib.jose.errors import JoseError +from fastapi import Depends, FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse, RedirectResponse, Response from fastapi.staticfiles import StaticFiles +from itsdangerous import BadSignature from starlette.middleware.base import BaseHTTPMiddleware from starlette.types import ASGIApp +import core.auth as auth_mod +from core.auth import ( + COOKIE_NAME, + ONE_SHOT_TTL, + SESSION_TTL, + ReviewerSession, +) from core.jobs import JobStore -from core.schema import PageResult +from core.schema import PageResult, VerifiedBy REQUEST_O_MATIC_URL = os.environ.get( "REQUEST_O_MATIC_URL", @@ -64,6 +92,16 @@ VERIFIER_PASSWORD = os.environ.get("VERIFIER_PASSWORD") VERIFIER_USER = os.environ.get("VERIFIER_USER", "verifier") +# OIDC takes precedence over BasicAuth when both are configured. A +# WXYC_OIDC_CLIENT_ID with an empty value (e.g. someone unsetting OIDC +# by clearing the value rather than removing the line) is treated as +# unset so the env-var-gate semantics match the BasicAuth path. +OIDC_ENABLED = bool(os.environ.get("WXYC_OIDC_CLIENT_ID")) +# `FLOWSHEET_PUBLIC_URL` decides whether session + one-shot cookies +# carry `Secure`. Local dev runs plain HTTP; production runs HTTPS; the +# flag tracks the URL scheme without a separate `IS_PRODUCTION` env. +_COOKIE_SECURE = os.environ.get("FLOWSHEET_PUBLIC_URL", "").startswith("https") + class _BasicAuthMiddleware(BaseHTTPMiddleware): """Gates every request with HTTP Basic Auth when a password is set. @@ -101,10 +139,130 @@ async def dispatch(self, request: Request, call_next): ) +# Routes that must answer unauthenticated so the auth dance + Railway's +# healthcheck can complete. `PUBLIC_PATHS` lists specific public routes; +# do NOT widen the `/auth/` prefix to mean anything else — `/auth/me` is +# deliberately gated, and a future `/auth/admin` would be too. Add new +# public endpoints to PUBLIC_PATHS by name. +# +# `/api/version` is load-bearing: Railway uses it as the healthcheck. If +# this set ever drops it, every deploy will roll back because the gate +# redirects the healthcheck request to /auth/login. +_PUBLIC_PATHS: frozenset[str] = frozenset( + {"/auth/login", "/auth/callback", "/auth/logout", "/api/version"} +) + + +class _SessionCookieMiddleware(BaseHTTPMiddleware): + """Gate all non-public paths on the signed session cookie. + + Reads `flowsheet_session`, verifies it via `core.auth.decode_session`, + and stashes the resulting `ReviewerSession` on `request.state.reviewer` + so downstream handlers (and the `get_reviewer` dependency) can read + it without re-parsing the cookie. + + A missing or invalid session redirects to `/auth/login?return_to=` + for HTML requests, and returns a 401 JSON for AJAX requests (the SPA + inspects the response to know to redirect client-side). + """ + + async def dispatch(self, request: Request, call_next): + path = request.url.path + if path in _PUBLIC_PATHS: + request.state.reviewer = None + return await call_next(request) + + raw = request.cookies.get(COOKIE_NAME, "") + reviewer = auth_mod.decode_session(raw) + if reviewer is None: + # AJAX / fetch — return 401 JSON so the SPA can redirect via + # location.href. Lets the verifier UI continue running its + # JS error handlers instead of getting an opaque 302. + if "application/json" in request.headers.get("accept", "").lower(): + return JSONResponse({"detail": "authentication required"}, status_code=401) + # HTML / browser nav — server-side 302 to /auth/login with + # a signed return_to. The login handler will validate it + # against the /verifier prefix allowlist. + return_to = path + if request.url.query: + return_to += f"?{request.url.query}" + login_url = f"/auth/login?return_to={urllib.parse.quote(return_to)}" + return RedirectResponse(login_url, status_code=302) + + request.state.reviewer = reviewer + return await call_next(request) + + app = FastAPI(docs_url=None, redoc_url=None) -if VERIFIER_PASSWORD: +# Tri-modal install. OIDC wins over BasicAuth so that the BasicAuth env +# var can stay set during the transition period without competing with +# the session gate. +if OIDC_ENABLED: + app.add_middleware(_SessionCookieMiddleware) +elif VERIFIER_PASSWORD: app.add_middleware(_BasicAuthMiddleware, user=VERIFIER_USER, password=VERIFIER_PASSWORD) + +def get_reviewer(request: Request) -> ReviewerSession | None: + """Read the authenticated reviewer off `request.state`, set by the + session middleware. + + Returns None in two cases: + * Non-OIDC deployments (BasicAuth or no-auth) — there is no + ReviewerSession to read. + * Public routes the middleware bypassed — they don't need an + identity to answer. + + Handlers that *require* identity (`/auth/me`, `/api/save` when we + want to credit the reviewer) raise 401 themselves on None. + """ + return getattr(request.state, "reviewer", None) + + +def _set_cookie( + response: Response, + key: str, + value: str, + *, + max_age: int, + path: str = "/", +) -> None: + """Set a cookie with the project's standard flags. + + HttpOnly + SameSite=lax + (Secure when serving over HTTPS) so the + OIDC redirect-back can carry one-shot cookies but no other site can + read the session cookie. SameSite=strict would drop the cookies on + the cross-site redirect from api.wxyc.org/auth. + """ + response.set_cookie( + key=key, + value=value, + max_age=max_age, + httponly=True, + secure=_COOKIE_SECURE, + samesite="lax", + path=path, + ) + + +def _validate_return_to(raw: str) -> str: + """Allowlist `return_to` to a relative path under `/verifier`. + + Defuses the open-redirect risk on `/auth/login?return_to=https://evil` + — an attacker who tricks a user into clicking that link would + otherwise see the user land on their site post-login. + + The allowlist requires a path-separator-or-end boundary after the + prefix so `/verifier` itself, `/verifier/?bundle=…`, and `/verifier/…` + are accepted while a future sibling route like `/verifierx-admin` + or `/verifierdata` is not silently treated as in-scope. A plain + `startswith("/verifier")` would accept any of those. + """ + if raw == "/verifier" or raw.startswith("/verifier/") or raw.startswith("/verifier?"): + return raw + return "/verifier/" + + # Cache of jobs.db paths whose `init()` migrations have already been # applied this process. Avoids re-running PRAGMAs + ALTER-TABLE checks # on every /api/save when jobs.db is unchanged across saves. Tests that @@ -130,6 +288,27 @@ async def _open_jobs_store() -> JobStore | None: return store +def _existing_verified_by(path: Path) -> VerifiedBy | None: + """Read `verified_by` from an existing on-disk verified.json, or + None if the file doesn't exist / isn't parseable / has no block. + + Used by `/api/save` in BasicAuth and no-auth deployments to preserve + a previously-recorded reviewer identity across a re-save where the + server has no authenticated reviewer to assert — the data-safety + rule says we don't clobber successfully collected data without + explicit user direction. + """ + if not path.is_file(): + return None + try: + return PageResult.model_validate_json(path.read_text()).verified_by + except Exception: # noqa: BLE001 + # Malformed JSON or a shape that doesn't validate isn't fatal + # — the save proceeds, just without preservation. The save's + # downstream Pydantic validation will produce a clean file. + return None + + def _safe_stem(stem: str) -> str: """Reject anything that could escape `data/verifier/` via path traversal. @@ -158,6 +337,183 @@ def _atomic_write_text(path: Path, content: str) -> None: os.replace(tmp, path) +# -- auth routes ----------------------------------------------------------- + + +@app.get("/auth/login") +async def auth_login(return_to: str = "/verifier/") -> Response: + """Kick off the OIDC code+PKCE dance. + + Validates `return_to`, builds the authorize URL, stashes + `state` / `code_verifier` / `return_to` in three signed one-shot + cookies (so the callback can validate them) and 302s to the auth + server. A transient auth-server failure surfaces as 503. + """ + return_to = _validate_return_to(return_to) + # Wrap the whole assembly (build_authorize_url + the three + # sign_one_shot calls) in one try/except. The sign_one_shot calls + # reach `_session_secret()` via `_one_shot_signer()`; a missing + # WXYC_SESSION_SECRET is a separate failure mode from the OIDC + # env vars that `build_authorize_url` touches, so a try wrapping + # only build_authorize_url would let RuntimeError leak as 500 here. + try: + url, state, code_verifier = await auth_mod.build_authorize_url() + response = RedirectResponse(url, status_code=302) + one_shot_age = int(ONE_SHOT_TTL.total_seconds()) + # path="/auth" limits the one-shot cookies to the OIDC dance — + # they aren't sent on /verifier or /api/* requests. + _set_cookie( + response, + "oidc_state", + auth_mod.sign_one_shot(state), + max_age=one_shot_age, + path="/auth", + ) + _set_cookie( + response, + "oidc_verifier", + auth_mod.sign_one_shot(code_verifier), + max_age=one_shot_age, + path="/auth", + ) + _set_cookie( + response, + "oidc_return_to", + auth_mod.sign_one_shot(return_to), + max_age=one_shot_age, + path="/auth", + ) + return response + except httpx.HTTPError as exc: + raise HTTPException(503, detail="auth server unavailable") from exc + except RuntimeError as exc: + # `RuntimeError` propagates from the env-var accessors in + # `core.auth` when a required var (WXYC_AUTH_ISSUER, + # WXYC_OIDC_CLIENT_ID, WXYC_SESSION_SECRET, FLOWSHEET_PUBLIC_URL) + # is unset. `core.auth.decode_session` already catches this for + # the request-gating path; mirror the protection here so a + # misconfigured deploy returns 503 ('auth not configured') + # rather than 500 with the env-var name in the traceback. + raise HTTPException(503, detail="auth not configured") from exc + + +@app.get("/auth/callback") +async def auth_callback(request: Request, code: str = "", state: str = "") -> Response: + """Validate the redirect, exchange the code, seal the session. + + Failure paths: + * Missing / tampered / expired one-shot cookies → 400. + * `state` mismatch → 400 (CSRF defense). + * Auth server unreachable → 503. + * id_token signature or aud/iss mismatch → 400. + + On success, sets `flowsheet_session` (12h) and 302s to the + validated `return_to`. + """ + if not code or not state: + raise HTTPException(400, detail="missing code or state") + + state_cookie = request.cookies.get("oidc_state", "") + verifier_cookie = request.cookies.get("oidc_verifier", "") + return_to_cookie = request.cookies.get("oidc_return_to", "") + + one_shot_age = int(ONE_SHOT_TTL.total_seconds()) + try: + signed_state = auth_mod.verify_one_shot(state_cookie, max_age=one_shot_age) + code_verifier = auth_mod.verify_one_shot(verifier_cookie, max_age=one_shot_age) + except BadSignature as exc: + raise HTTPException(400, detail="invalid auth state") from exc + except RuntimeError as exc: + # See the matching catch in `auth_login` — env-var unset + # surfaces as RuntimeError from `core.auth._session_secret()` + # via the one-shot signer. Map to 503 ("auth not configured") + # instead of letting it 500 with the env-var name in the + # traceback. + raise HTTPException(503, detail="auth not configured") from exc + + # `secrets.compare_digest` mirrors the precedent in + # `_BasicAuthMiddleware` — constant-time string comparison so a + # timing attack can't unmask the expected state value. + if not secrets.compare_digest(state, signed_state): + raise HTTPException(400, detail="invalid auth state") + + try: + return_to = _validate_return_to( + auth_mod.verify_one_shot(return_to_cookie, max_age=one_shot_age) + ) + except BadSignature: + # A missing/tampered return_to cookie isn't fatal — fall back + # to the verifier index. The user gets logged in either way. + return_to = "/verifier/" + except RuntimeError as exc: + # Same protection as the sibling verify_one_shot block above: + # env-var unset (e.g., a config-blank-mid-call between the two + # verify_one_shot calls) surfaces as RuntimeError; map to 503 + # rather than 500. Without this catch, the previous block's + # RuntimeError protection would feel inconsistent. + raise HTTPException(503, detail="auth not configured") from exc + + try: + reviewer = await auth_mod.exchange_code(code=code, code_verifier=code_verifier) + except httpx.HTTPError as exc: + raise HTTPException(503, detail="auth server unavailable") from exc + except RuntimeError as exc: + # Same protection as the verify_one_shot block — env var unset + # surfaces from core.auth's accessor functions as RuntimeError. + raise HTTPException(503, detail="auth not configured") from exc + except (JoseError, KeyError, ValueError) as exc: + # `KeyError` covers a token response missing `id_token`; + # `ValueError` covers any downstream parse error not caught + # by the JOSE layer. + raise HTTPException(400, detail="invalid id token") from exc + + response = RedirectResponse(return_to, status_code=302) + _set_cookie( + response, + COOKIE_NAME, + auth_mod.encode_session(reviewer), + max_age=int(SESSION_TTL.total_seconds()), + path="/", + ) + # Clear the one-shots so a back-button retry can't replay the code + # (which Better Auth would reject anyway, but defense in depth). + for k in ("oidc_state", "oidc_verifier", "oidc_return_to"): + response.delete_cookie(k, path="/auth") + return response + + +@app.post("/auth/logout") +async def auth_logout() -> Response: + """Clear the local session cookie. + + The Better Auth session at api.wxyc.org is unaffected — same + behavior Wiki.js has today. Single-sign-out is out of scope until + we have a per-session server-side store to revoke. + """ + response = JSONResponse({"ok": True}) + response.delete_cookie(COOKIE_NAME, path="/") + return response + + +@app.get("/auth/me") +async def auth_me( + reviewer: Annotated[ReviewerSession | None, Depends(get_reviewer)] = None, +) -> JSONResponse: + """Return the currently logged-in reviewer. + + Used by the SPA on page load to render the reviewer's name in the + header. 401 when no session — the SPA reads that as "redirect to + /auth/login" (the middleware would have done this for HTML + requests, but /auth/me is fetched as JSON). + """ + if reviewer is None: + raise HTTPException(401, detail="not authenticated") + return JSONResponse(dataclasses.asdict(reviewer)) + + +# -- existing API routes --------------------------------------------------- + + @app.get("/api/version") async def api_version() -> JSONResponse: """Version tag the JS can poll to detect a new deploy. @@ -239,7 +595,10 @@ def _resolve_status(incoming: str | None, corrections_path: Path) -> str: @app.post("/api/save") -async def save(request: Request) -> JSONResponse: +async def save( + request: Request, + reviewer: Annotated[ReviewerSession | None, Depends(get_reviewer)] = None, +) -> JSONResponse: """Persist a verifier UI session to disk and (optionally) `jobs.db`. Expected body shape: @@ -262,10 +621,17 @@ async def save(request: Request) -> JSONResponse: on-disk `"complete"` is preserved across plain Saves. See `_resolve_status` for the table. + Server-as-authority on `verified_by`: a client-supplied + `verified_by` block inside the `verified` payload is discarded — + the server writes its own block from the authenticated reviewer's + `ReviewerSession`. Without this rule, a malicious or buggy client + could spoof another reviewer's identity into the saved file. + If `pdf_path` and `page_number` are present, also calls - `JobStore.mark_verified` to record the verification in `jobs.db`. - For bundles without a job key (test fixtures), only the files are - written and `db_updated` is False in the response. + `JobStore.mark_verified` to record the verification in `jobs.db`, + including the reviewer's user_id as `reviewer_id`. For bundles + without a job key (test fixtures), only the files are written and + `db_updated` is False in the response. """ try: payload = await request.json() @@ -308,6 +674,33 @@ async def save(request: Request) -> JSONResponse: VERIFIER_DIR.mkdir(parents=True, exist_ok=True) verified_path = VERIFIER_DIR / f"{stem}.verified.json" corrections_path = VERIFIER_DIR / f"{stem}.corrections.json" + + # Server-authority overwrite, with prior-state preservation when + # there's no reviewer. + # + # OIDC mode (reviewer present): discard whatever the client sent + # for `verified_by` and write the authenticated reviewer's + # identity. The client cannot spoof another reviewer. + # + # BasicAuth / no-auth mode (reviewer is None): there's no identity + # to assert. Reading the prior on-disk `verified_by` and writing it + # back preserves any historical OIDC-recorded reviewer record (data- + # safety rule: never overwrite successfully collected data) while + # still discarding any value the client tried to forge — the client + # has no role in deciding `verified_by` in either mode. + # + # Pydantic's validation above runs first, so a malformed client- + # supplied block returns 400 before this overwrite ever runs. + if reviewer is not None: + validated.verified_by = VerifiedBy( + user_id=reviewer.user_id, + username=reviewer.username, + real_name=reviewer.real_name, + dj_name=reviewer.dj_name, + verified_at=datetime.now(UTC), + ) + else: + validated.verified_by = _existing_verified_by(verified_path) # Resolve status BEFORE we overwrite the existing corrections file, # so the preservation rule can see the prior on-disk value. resolved_status = _resolve_status(incoming_status, corrections_path) @@ -322,11 +715,22 @@ async def save(request: Request) -> JSONResponse: if pdf_path and isinstance(page_number, int) and not isinstance(page_number, bool): store = await _open_jobs_store() if store is not None: + # Pair with the verified_by preservation rule above: when + # there's no reviewer to credit, send the prior `verified_by`'s + # user_id (read off disk) so the denormalized jobs.reviewer_id + # column doesn't get clobbered to NULL on a non-OIDC re-save. + reviewer_id_for_db: str | None + if reviewer is not None: + reviewer_id_for_db = reviewer.user_id + else: + preserved = validated.verified_by + reviewer_id_for_db = preserved.user_id if preserved is not None else None db_updated = await store.mark_verified( pdf_path=pdf_path, page_number=page_number, verified_path=verified_path, corrections_path=corrections_path, + reviewer_id=reviewer_id_for_db, ) # Report paths relative to DATA_ROOT.parent so the UI displays diff --git a/verifier/styles.css b/verifier/styles.css index fe41885..6ca2ecf 100644 --- a/verifier/styles.css +++ b/verifier/styles.css @@ -61,6 +61,19 @@ body > header h1 { .status-pill.status-partial { background: #fff5e0; color: #8a5a00; border: 1px solid #ecd9a8; } .status-pill.status-complete { background: #e6f5e6; color: #1f6f1f; border: 1px solid #b8e0b8; } +/* Signed-in reviewer chip in the header. Quiet by default — its job + is to tell the volunteer "you are X" without competing with the + page-status pill or the action buttons. */ +.reviewer-name { + font-size: 11px; + color: var(--muted); + margin-left: auto; + padding: 3px 8px; + border: 1px solid var(--border); + border-radius: 8px; + background: white; +} + /* Index-mode table. */ #bundle-list { width: 100%;