fix: stabilize local model review flow (#16)

ftchvs · claude · web-flow · commit fe9d2bb20205 · 2026-05-10T20:08:04.000-03:00
* fix: stabilize local model review flow

* fix: support thinking local model outputs

* fix: prevent dev server reload churn

* refactor: tighten local model review code paths

- Promote "all" platform sentinel to ALL_PLATFORMS constant in models.
- Promote embedding-model exclusion list to EMBEDDING_MODEL_MARKERS.
- Drop unreachable branch in fenced-JSON candidate extractor.
- Cache escapeHtml output per option in populateModelOptions.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;

---------

Co-authored-by: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/Makefile b/Makefile
@@ -3,6 +3,8 @@ VENV := .venv
 BIN := $(VENV)/bin
 STAMP := $(VENV)/.installed
 MODEL_EVAL_FLAGS ?= --ollama-model gpt-oss-safeguard:20b
+ADLINT_OLLAMA_TIMEOUT ?= 180
+ADLINT_OLLAMA_NUM_PREDICT ?= 1024
 
 .PHONY: api dev scan eval benchmark benchmark-data policy-coverage policy-coverage-validate rewrite-quality model-benchmark model-smoke model-usefulness pr-preflight real-cases real-cases-ci real-cases-hybrid real-cases-model-quality real-cases-validate real-world-blind-candidates real-world-blind-ci real-world-blind-validate real-world-blind real-world-blind-model-quality research-summary test install
 
@@ -17,7 +19,7 @@ dev: $(STAMP)
 	$(BIN)/python -m adlint scan examples/high_risk_tiktok_health.json --output-dir reports
 
 api: $(STAMP)
-	$(BIN)/uvicorn adlint.api:app --reload
+	ADLINT_OLLAMA_TIMEOUT=$(ADLINT_OLLAMA_TIMEOUT) ADLINT_OLLAMA_NUM_PREDICT=$(ADLINT_OLLAMA_NUM_PREDICT) $(BIN)/uvicorn adlint.api:app --reload --reload-dir adlint
 
 scan: $(STAMP)
 	$(BIN)/python -m adlint scan examples/needs_review_google_wellness.json
diff --git a/adlint/classifiers/ollama.py b/adlint/classifiers/ollama.py
@@ -148,6 +148,7 @@ def _generation_payload(endpoint: str, model: str, prompt: str) -> dict[str, Any
         "model": model,
         "stream": False,
         "format": "json",
+        "think": False,
         "options": options,
     }
     if urllib.parse.urlparse(endpoint).path.endswith("/api/generate"):
@@ -280,6 +281,7 @@ def _clip(value: str, *, max_chars: int) -> str:
 
 
 def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str | None]:
+    response_text = _json_response_candidate(response_text)
     try:
         parsed = json.loads(response_text)
     except json.JSONDecodeError:
@@ -299,6 +301,21 @@ def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str
     return parsed, True, None
 
 
+def _json_response_candidate(response_text: str) -> str:
+    text = response_text.strip()
+    if text.startswith("```"):
+        lines = text.splitlines()[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        text = "\n".join(lines).strip()
+
+    start = text.find("{")
+    end = text.rfind("}")
+    if start != -1 and end != -1 and start < end:
+        return text[start : end + 1]
+    return text
+
+
 def _is_string_list(value: Any) -> bool:
     return isinstance(value, list) and all(isinstance(item, str) for item in value)
 
diff --git a/adlint/models.py b/adlint/models.py
@@ -7,6 +7,8 @@
 Decision = str
 Severity = str
 
+ALL_PLATFORMS = "all"
+
 
 @dataclass(frozen=True)
 class Evidence:
diff --git a/adlint/policy.py b/adlint/policy.py
@@ -6,7 +6,7 @@
 
 import yaml
 
-from adlint.models import Policy, Submission
+from adlint.models import ALL_PLATFORMS, Policy, Submission
 
 
 DEFAULT_MODULES = (
@@ -49,7 +49,7 @@ def filter_policies(policies: Iterable[Policy], submission: Submission) -> list[
     for policy in policies:
         if policy.modules and not enabled_modules.intersection(policy.modules):
             continue
-        if policy.platforms and submission.platform not in policy.platforms:
+        if policy.platforms and submission.platform != ALL_PLATFORMS and submission.platform not in policy.platforms:
             continue
         if policy.industries and submission.industry not in policy.industries:
             continue
diff --git a/adlint/rules/engine.py b/adlint/rules/engine.py
@@ -3,7 +3,7 @@
 import re
 from collections import defaultdict
 
-from adlint.models import Evidence, LandingPageSnapshot, Policy, PolicyHit, Submission
+from adlint.models import ALL_PLATFORMS, Evidence, LandingPageSnapshot, Policy, PolicyHit, Submission
 
 
 MAX_EVIDENCE_PER_POLICY = 5
@@ -151,7 +151,7 @@ def _derived_linkedin_professional_claim_hits(
     policies: list[Policy],
     existing_hits: list[PolicyHit],
 ) -> list[PolicyHit]:
-    if submission.platform != "linkedin":
+    if submission.platform not in {"linkedin", ALL_PLATFORMS}:
         return []
     if any(hit.policy_id == "linkedin_professional_claim_review" for hit in existing_hits):
         return []
diff --git a/adlint/static/app.js b/adlint/static/app.js
@@ -13,9 +13,18 @@ const exportMarkdownButton = document.querySelector("#export-markdown");
 const modelEnabledInput = document.querySelector("#model_enabled");
 const modelAffectsScoreInput = document.querySelector("#model_affects_score");
 const ollamaModelInput = document.querySelector("#ollama_model");
-const ollamaModelOptions = document.querySelector("#ollama-model-options");
 
 const DEFAULT_OLLAMA_MODEL = "gpt-oss-safeguard:20b";
+const FALLBACK_OLLAMA_MODELS = [
+  DEFAULT_OLLAMA_MODEL,
+  "gpt-oss:20b",
+  "qwen3-coder:30b",
+  "qwen3.5:35b-a3b",
+  "gemma4:26b",
+];
+const RULE_ONLY_TIMEOUT_MS = 30000;
+const LOCAL_MODEL_TIMEOUT_MS = 210000;
+const EMBEDDING_MODEL_MARKERS = ["embed", "bge-"];
 const MODEL_STATUSES = ["disabled", "unavailable", "invalid_response", "ok"];
 const ANALYSIS_STEPS = [
   ["intake", "Input normalized", "Copy, campaign context, modules, and optional landing inputs are prepared for review."],
@@ -44,11 +53,11 @@ form.addEventListener("submit", async (event) => {
   setSubmitting(true);
 
   try {
-    const response = await fetch("/analyze", {
+    const response = await fetchWithTimeout("/analyze", {
       method: "POST",
       headers: { "content-type": "application/json" },
       body: JSON.stringify(payload),
-    });
+    }, requestTimeoutMs(payload));
 
     if (!response.ok) {
       const detail = await response.text();
@@ -76,6 +85,26 @@ form.addEventListener("submit", async (event) => {
   }
 });
 
+async function fetchWithTimeout(url, options, timeoutMs) {
+  const controller = new AbortController();
+  const timer = window.setTimeout(() => controller.abort(), timeoutMs);
+  try {
+    return await fetch(url, { ...options, signal: controller.signal });
+  } catch (error) {
+    if (error && error.name === "AbortError") {
+      const seconds = Math.round(timeoutMs / 1000);
+      throw new Error(`Review timed out after ${seconds}s. Try a smaller local model or run again after the model has warmed up.`);
+    }
+    throw error;
+  } finally {
+    window.clearTimeout(timer);
+  }
+}
+
+function requestTimeoutMs(payload) {
+  return payload.model_enabled ? LOCAL_MODEL_TIMEOUT_MS : RULE_ONLY_TIMEOUT_MS;
+}
+
 form.addEventListener(
   "invalid",
   () => {
@@ -137,19 +166,19 @@ async function discoverModels() {
     const payload = await response.json();
     const models = normalizeModelList(payload);
     const defaultModel = modelName(payload?.default_model) || DEFAULT_OLLAMA_MODEL;
-    populateModelOptions(models.length > 0 ? [defaultModel, ...models] : [defaultModel]);
+    populateModelOptions([defaultModel, ...models, ...FALLBACK_OLLAMA_MODELS]);
     if (!ollamaModelInput.value.trim() || ollamaModelInput.value === DEFAULT_OLLAMA_MODEL) {
       ollamaModelInput.value = defaultModel;
     }
   } catch {
-    populateModelOptions([DEFAULT_OLLAMA_MODEL]);
+    populateModelOptions(FALLBACK_OLLAMA_MODELS);
     if (!ollamaModelInput.value.trim()) ollamaModelInput.value = DEFAULT_OLLAMA_MODEL;
   }
 }
 
 function normalizeModelList(payload) {
   const source = Array.isArray(payload) ? payload : payload && Array.isArray(payload.models) ? payload.models : [];
-  return [...new Set(source.map(modelName).filter(Boolean))];
+  return [...new Set(source.map(modelName).filter(isReviewModelOption))];
 }
 
 function modelName(item) {
@@ -160,14 +189,31 @@ function modelName(item) {
   return "";
 }
 
+function isReviewModelOption(value) {
+  if (!value) return false;
+  const normalized = value.toLowerCase();
+  return !EMBEDDING_MODEL_MARKERS.some((marker) => normalized.includes(marker));
+}
+
 function populateModelOptions(models) {
   const values = uniqueModelOptions(models);
-  ollamaModelOptions.innerHTML = values.map((model) => `<option value="${escapeHtml(model)}"></option>`).join("");
+  const currentValue = ollamaModelInput.value.trim();
+  ollamaModelInput.innerHTML = values
+    .map((model) => {
+      const safe = escapeHtml(model);
+      return `<option value="${safe}">${safe}</option>`;
+    })
+    .join("");
+  if (currentValue && values.includes(currentValue)) {
+    ollamaModelInput.value = currentValue;
+  } else {
+    ollamaModelInput.value = values.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : values[0] || "";
+  }
 }
 
 function uniqueModelOptions(models) {
   const values = [];
-  for (const model of [...models, DEFAULT_OLLAMA_MODEL]) {
+  for (const model of [...models, ...FALLBACK_OLLAMA_MODELS]) {
     const value = modelName(model);
     if (value && !values.includes(value)) values.push(value);
   }
diff --git a/adlint/static/index.html b/adlint/static/index.html
@@ -49,7 +49,8 @@ <h2>Draft ad</h2>
             <label>
               Platform
               <select id="platform" name="platform">
-                <option value="google">Google</option>
+                <option value="all">All</option>
+                <option value="google" selected>Google</option>
                 <option value="tiktok">TikTok</option>
                 <option value="linkedin">LinkedIn</option>
                 <option value="meta">Meta</option>
@@ -81,18 +82,17 @@ <h2>Draft ad</h2>
             </label>
             <label>
               Model
-              <input
+              <select
                 id="ollama_model"
                 name="ollama_model"
-                list="ollama-model-options"
-                autocomplete="off"
-                value="gpt-oss-safeguard:20b"
-                placeholder="gpt-oss-safeguard:20b"
-              />
+              >
+                <option value="gpt-oss-safeguard:20b" selected>gpt-oss-safeguard:20b</option>
+                <option value="gpt-oss:20b">gpt-oss:20b</option>
+                <option value="qwen3-coder:30b">qwen3-coder:30b</option>
+                <option value="qwen3.5:35b-a3b">qwen3.5:35b-a3b</option>
+                <option value="gemma4:26b">gemma4:26b</option>
+              </select>
             </label>
-            <datalist id="ollama-model-options">
-              <option value="gpt-oss-safeguard:20b"></option>
-            </datalist>
           </fieldset>
 
           <fieldset>
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -201,7 +201,7 @@ def test_ui_assets_are_served() -> None:
 
     assert js_response.status_code == 200
     assert css_response.status_code == 200
-    assert 'fetch("/analyze"' in js_response.text
+    assert 'fetchWithTimeout("/analyze"' in js_response.text
     assert "logging_enabled: true" not in js_response.text
     assert ".result-panel" in css_response.text
 
diff --git a/tests/test_ollama.py b/tests/test_ollama.py
@@ -98,6 +98,7 @@ def fake_urlopen(request, timeout):
     assert seen["payload"]["model"] == "llama3.2:latest"
     assert seen["payload"]["stream"] is False
     assert seen["payload"]["format"] == "json"
+    assert seen["payload"]["think"] is False
     assert seen["payload"]["options"] == {"temperature": 0}
     assert seen["payload"]["messages"][0]["role"] == "user"
 
@@ -218,6 +219,35 @@ def fake_urlopen(request, timeout):
     assert "valid JSON" in info["validation_error"]
 
 
+def test_classify_with_ollama_accepts_fenced_json_response(monkeypatch) -> None:
+    def fake_urlopen(request, timeout):
+        return FakeResponse(
+            {
+                "message": {
+                    "content": """```json
+{
+  "decision": "needs_review",
+  "categories": ["platform"],
+  "evidence": ["review claim"],
+  "recommended_action": "Route for platform review."
+}
+```"""
+                }
+            }
+        )
+
+    monkeypatch.setattr("urllib.request.urlopen", fake_urlopen)
+
+    hits, info = classify_with_ollama(
+        Submission(platform="google", country="US", industry="general"),
+        endpoint="http://localhost:11434/api/chat",
+    )
+
+    assert info["status"] == "ok"
+    assert info["raw_decision"] == "needs_review"
+    assert [hit.policy_id for hit in hits] == ["model_policy_review"]
+
+
 def test_classify_with_ollama_rejects_unknown_decision_without_hits(monkeypatch) -> None:
     def fake_urlopen(request, timeout):
         return FakeResponse({"message": {"content": '{"decision": "banana", "evidence": []}'}})
diff --git a/tests/test_policy.py b/tests/test_policy.py
@@ -74,6 +74,22 @@ def test_filter_policies_applies_platform_and_industry_filters(tmp_path) -> None
     assert filter_policies(policies, wrong_industry) == []
 
 
+def test_filter_policies_all_platform_includes_platform_scoped_policies(tmp_path) -> None:
+    policy_path = tmp_path / "custom.yml"
+    policy_path.write_text(CUSTOM_POLICY, encoding="utf-8")
+    policies = load_policies([policy_path])
+    all_platforms = Submission.from_dict(
+        {
+            "platform": "all",
+            "industry": "health",
+            "headline": "Clinical guarantee",
+            "policy_modules": ["health_claims"],
+        }
+    )
+
+    assert [policy.id for policy in filter_policies(policies, all_platforms)] == ["custom_health_claim"]
+
+
 def test_bundled_meta_ads_policy_module_is_platform_scoped() -> None:
     meta_policy_ids = {
         "meta_personal_attributes_health",
diff --git a/tests/test_ui_static.py b/tests/test_ui_static.py
@@ -18,9 +18,11 @@ def test_local_model_controls_are_present_and_default_off() -> None:
     assert 'name="model_affects_score"' in INDEX_HTML
     assert 'id="ollama_model"' in INDEX_HTML
     assert 'name="ollama_model"' in INDEX_HTML
-    assert 'list="ollama-model-options"' in INDEX_HTML
-    assert 'value="gpt-oss-safeguard:20b"' in INDEX_HTML
-    assert 'id="ollama-model-options"' in INDEX_HTML
+    assert '<option value="gpt-oss-safeguard:20b" selected>gpt-oss-safeguard:20b</option>' in INDEX_HTML
+    assert '<option value="gpt-oss:20b">gpt-oss:20b</option>' in INDEX_HTML
+    assert '<option value="qwen3-coder:30b">qwen3-coder:30b</option>' in INDEX_HTML
+    assert '<option value="qwen3.5:35b-a3b">qwen3.5:35b-a3b</option>' in INDEX_HTML
+    assert '<option value="gemma4:26b">gemma4:26b</option>' in INDEX_HTML
 
 
 def test_copy_fields_are_required_so_placeholders_do_not_submit() -> None:
@@ -42,12 +44,16 @@ def test_page_starts_with_glp1_sample_context() -> None:
 
 def test_model_discovery_fetches_models_and_keeps_fallback_option() -> None:
     assert 'const DEFAULT_OLLAMA_MODEL = "gpt-oss-safeguard:20b";' in APP_JS
+    assert "const FALLBACK_OLLAMA_MODELS = [" in APP_JS
     assert 'fetch("/models")' in APP_JS
     assert "normalizeModelList(payload)" in APP_JS
     assert "modelName(payload?.default_model)" in APP_JS
-    assert "populateModelOptions([DEFAULT_OLLAMA_MODEL])" in APP_JS
+    assert "populateModelOptions(FALLBACK_OLLAMA_MODELS)" in APP_JS
+    assert "function isReviewModelOption(value)" in APP_JS
+    assert 'const EMBEDDING_MODEL_MARKERS = ["embed", "bge-"];' in APP_JS
+    assert "EMBEDDING_MODEL_MARKERS.some((marker) => normalized.includes(marker))" in APP_JS
     assert "function uniqueModelOptions(models)" in APP_JS
-    assert "for (const model of [...models, DEFAULT_OLLAMA_MODEL])" in APP_JS
+    assert "for (const model of [...models, ...FALLBACK_OLLAMA_MODELS])" in APP_JS
     assert "if (value && !values.includes(value)) values.push(value)" in APP_JS
 
 
@@ -68,7 +74,18 @@ def test_analyze_payload_includes_model_keys_when_enabled() -> None:
     assert "if (modelEnabled)" in APP_JS
     assert "payload.ollama_model" in APP_JS
     assert "payload.model_affects_score" in APP_JS
-    assert 'fetch("/analyze"' in APP_JS
+    assert 'fetchWithTimeout("/analyze"' in APP_JS
+
+
+def test_analyze_fetch_has_timeout_recovery_for_stuck_model_runs() -> None:
+    assert "const RULE_ONLY_TIMEOUT_MS = 30000;" in APP_JS
+    assert "const LOCAL_MODEL_TIMEOUT_MS = 210000;" in APP_JS
+    assert "fetchWithTimeout(\"/analyze\"" in APP_JS
+    assert "new AbortController()" in APP_JS
+    assert "controller.abort()" in APP_JS
+    assert "requestTimeoutMs(payload)" in APP_JS
+    assert "Review timed out after" in APP_JS
+    assert "Try a smaller local model" in APP_JS
 
 
 def test_results_and_markdown_expose_model_status() -> None:
@@ -163,4 +180,6 @@ def test_geist_style_system_font_and_restrained_surfaces_are_preserved() -> None
 
 
 def test_platform_select_includes_meta_ads() -> None:
+    assert '<option value="all">All</option>' in INDEX_HTML
+    assert '<option value="google" selected>Google</option>' in INDEX_HTML
     assert '<option value="meta">Meta</option>' in INDEX_HTML