Skip to content

Commit fe9d2bb

Browse files
ftchvsclaude
andauthored
fix: stabilize local model review flow (#16)
* fix: stabilize local model review flow * fix: support thinking local model outputs * fix: prevent dev server reload churn * refactor: tighten local model review code paths - Promote "all" platform sentinel to ALL_PLATFORMS constant in models. - Promote embedding-model exclusion list to EMBEDDING_MODEL_MARKERS. - Drop unreachable branch in fenced-JSON candidate extractor. - Cache escapeHtml output per option in populateModelOptions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 59eec91 commit fe9d2bb

11 files changed

Lines changed: 162 additions & 30 deletions

File tree

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ VENV := .venv
33
BIN := $(VENV)/bin
44
STAMP := $(VENV)/.installed
55
MODEL_EVAL_FLAGS ?= --ollama-model gpt-oss-safeguard:20b
6+
ADLINT_OLLAMA_TIMEOUT ?= 180
7+
ADLINT_OLLAMA_NUM_PREDICT ?= 1024
68

79
.PHONY: api dev scan eval benchmark benchmark-data policy-coverage policy-coverage-validate rewrite-quality model-benchmark model-smoke model-usefulness pr-preflight real-cases real-cases-ci real-cases-hybrid real-cases-model-quality real-cases-validate real-world-blind-candidates real-world-blind-ci real-world-blind-validate real-world-blind real-world-blind-model-quality research-summary test install
810

@@ -17,7 +19,7 @@ dev: $(STAMP)
1719
$(BIN)/python -m adlint scan examples/high_risk_tiktok_health.json --output-dir reports
1820

1921
api: $(STAMP)
20-
$(BIN)/uvicorn adlint.api:app --reload
22+
ADLINT_OLLAMA_TIMEOUT=$(ADLINT_OLLAMA_TIMEOUT) ADLINT_OLLAMA_NUM_PREDICT=$(ADLINT_OLLAMA_NUM_PREDICT) $(BIN)/uvicorn adlint.api:app --reload --reload-dir adlint
2123

2224
scan: $(STAMP)
2325
$(BIN)/python -m adlint scan examples/needs_review_google_wellness.json

adlint/classifiers/ollama.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def _generation_payload(endpoint: str, model: str, prompt: str) -> dict[str, Any
148148
"model": model,
149149
"stream": False,
150150
"format": "json",
151+
"think": False,
151152
"options": options,
152153
}
153154
if urllib.parse.urlparse(endpoint).path.endswith("/api/generate"):
@@ -280,6 +281,7 @@ def _clip(value: str, *, max_chars: int) -> str:
280281

281282

282283
def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str | None]:
284+
response_text = _json_response_candidate(response_text)
283285
try:
284286
parsed = json.loads(response_text)
285287
except json.JSONDecodeError:
@@ -299,6 +301,21 @@ def _parse_model_response(response_text: str) -> tuple[dict[str, Any], bool, str
299301
return parsed, True, None
300302

301303

304+
def _json_response_candidate(response_text: str) -> str:
305+
text = response_text.strip()
306+
if text.startswith("```"):
307+
lines = text.splitlines()[1:]
308+
if lines and lines[-1].strip() == "```":
309+
lines = lines[:-1]
310+
text = "\n".join(lines).strip()
311+
312+
start = text.find("{")
313+
end = text.rfind("}")
314+
if start != -1 and end != -1 and start < end:
315+
return text[start : end + 1]
316+
return text
317+
318+
302319
def _is_string_list(value: Any) -> bool:
303320
return isinstance(value, list) and all(isinstance(item, str) for item in value)
304321

adlint/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
Decision = str
88
Severity = str
99

10+
ALL_PLATFORMS = "all"
11+
1012

1113
@dataclass(frozen=True)
1214
class Evidence:

adlint/policy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import yaml
88

9-
from adlint.models import Policy, Submission
9+
from adlint.models import ALL_PLATFORMS, Policy, Submission
1010

1111

1212
DEFAULT_MODULES = (
@@ -49,7 +49,7 @@ def filter_policies(policies: Iterable[Policy], submission: Submission) -> list[
4949
for policy in policies:
5050
if policy.modules and not enabled_modules.intersection(policy.modules):
5151
continue
52-
if policy.platforms and submission.platform not in policy.platforms:
52+
if policy.platforms and submission.platform != ALL_PLATFORMS and submission.platform not in policy.platforms:
5353
continue
5454
if policy.industries and submission.industry not in policy.industries:
5555
continue

adlint/rules/engine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
from collections import defaultdict
55

6-
from adlint.models import Evidence, LandingPageSnapshot, Policy, PolicyHit, Submission
6+
from adlint.models import ALL_PLATFORMS, Evidence, LandingPageSnapshot, Policy, PolicyHit, Submission
77

88

99
MAX_EVIDENCE_PER_POLICY = 5
@@ -151,7 +151,7 @@ def _derived_linkedin_professional_claim_hits(
151151
policies: list[Policy],
152152
existing_hits: list[PolicyHit],
153153
) -> list[PolicyHit]:
154-
if submission.platform != "linkedin":
154+
if submission.platform not in {"linkedin", ALL_PLATFORMS}:
155155
return []
156156
if any(hit.policy_id == "linkedin_professional_claim_review" for hit in existing_hits):
157157
return []

adlint/static/app.js

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,18 @@ const exportMarkdownButton = document.querySelector("#export-markdown");
1313
const modelEnabledInput = document.querySelector("#model_enabled");
1414
const modelAffectsScoreInput = document.querySelector("#model_affects_score");
1515
const ollamaModelInput = document.querySelector("#ollama_model");
16-
const ollamaModelOptions = document.querySelector("#ollama-model-options");
1716

1817
const DEFAULT_OLLAMA_MODEL = "gpt-oss-safeguard:20b";
18+
const FALLBACK_OLLAMA_MODELS = [
19+
DEFAULT_OLLAMA_MODEL,
20+
"gpt-oss:20b",
21+
"qwen3-coder:30b",
22+
"qwen3.5:35b-a3b",
23+
"gemma4:26b",
24+
];
25+
const RULE_ONLY_TIMEOUT_MS = 30000;
26+
const LOCAL_MODEL_TIMEOUT_MS = 210000;
27+
const EMBEDDING_MODEL_MARKERS = ["embed", "bge-"];
1928
const MODEL_STATUSES = ["disabled", "unavailable", "invalid_response", "ok"];
2029
const ANALYSIS_STEPS = [
2130
["intake", "Input normalized", "Copy, campaign context, modules, and optional landing inputs are prepared for review."],
@@ -44,11 +53,11 @@ form.addEventListener("submit", async (event) => {
4453
setSubmitting(true);
4554

4655
try {
47-
const response = await fetch("/analyze", {
56+
const response = await fetchWithTimeout("/analyze", {
4857
method: "POST",
4958
headers: { "content-type": "application/json" },
5059
body: JSON.stringify(payload),
51-
});
60+
}, requestTimeoutMs(payload));
5261

5362
if (!response.ok) {
5463
const detail = await response.text();
@@ -76,6 +85,26 @@ form.addEventListener("submit", async (event) => {
7685
}
7786
});
7887

88+
async function fetchWithTimeout(url, options, timeoutMs) {
89+
const controller = new AbortController();
90+
const timer = window.setTimeout(() => controller.abort(), timeoutMs);
91+
try {
92+
return await fetch(url, { ...options, signal: controller.signal });
93+
} catch (error) {
94+
if (error && error.name === "AbortError") {
95+
const seconds = Math.round(timeoutMs / 1000);
96+
throw new Error(`Review timed out after ${seconds}s. Try a smaller local model or run again after the model has warmed up.`);
97+
}
98+
throw error;
99+
} finally {
100+
window.clearTimeout(timer);
101+
}
102+
}
103+
104+
function requestTimeoutMs(payload) {
105+
return payload.model_enabled ? LOCAL_MODEL_TIMEOUT_MS : RULE_ONLY_TIMEOUT_MS;
106+
}
107+
79108
form.addEventListener(
80109
"invalid",
81110
() => {
@@ -137,19 +166,19 @@ async function discoverModels() {
137166
const payload = await response.json();
138167
const models = normalizeModelList(payload);
139168
const defaultModel = modelName(payload?.default_model) || DEFAULT_OLLAMA_MODEL;
140-
populateModelOptions(models.length > 0 ? [defaultModel, ...models] : [defaultModel]);
169+
populateModelOptions([defaultModel, ...models, ...FALLBACK_OLLAMA_MODELS]);
141170
if (!ollamaModelInput.value.trim() || ollamaModelInput.value === DEFAULT_OLLAMA_MODEL) {
142171
ollamaModelInput.value = defaultModel;
143172
}
144173
} catch {
145-
populateModelOptions([DEFAULT_OLLAMA_MODEL]);
174+
populateModelOptions(FALLBACK_OLLAMA_MODELS);
146175
if (!ollamaModelInput.value.trim()) ollamaModelInput.value = DEFAULT_OLLAMA_MODEL;
147176
}
148177
}
149178

150179
function normalizeModelList(payload) {
151180
const source = Array.isArray(payload) ? payload : payload && Array.isArray(payload.models) ? payload.models : [];
152-
return [...new Set(source.map(modelName).filter(Boolean))];
181+
return [...new Set(source.map(modelName).filter(isReviewModelOption))];
153182
}
154183

155184
function modelName(item) {
@@ -160,14 +189,31 @@ function modelName(item) {
160189
return "";
161190
}
162191

192+
function isReviewModelOption(value) {
193+
if (!value) return false;
194+
const normalized = value.toLowerCase();
195+
return !EMBEDDING_MODEL_MARKERS.some((marker) => normalized.includes(marker));
196+
}
197+
163198
function populateModelOptions(models) {
164199
const values = uniqueModelOptions(models);
165-
ollamaModelOptions.innerHTML = values.map((model) => `<option value="${escapeHtml(model)}"></option>`).join("");
200+
const currentValue = ollamaModelInput.value.trim();
201+
ollamaModelInput.innerHTML = values
202+
.map((model) => {
203+
const safe = escapeHtml(model);
204+
return `<option value="${safe}">${safe}</option>`;
205+
})
206+
.join("");
207+
if (currentValue && values.includes(currentValue)) {
208+
ollamaModelInput.value = currentValue;
209+
} else {
210+
ollamaModelInput.value = values.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : values[0] || "";
211+
}
166212
}
167213

168214
function uniqueModelOptions(models) {
169215
const values = [];
170-
for (const model of [...models, DEFAULT_OLLAMA_MODEL]) {
216+
for (const model of [...models, ...FALLBACK_OLLAMA_MODELS]) {
171217
const value = modelName(model);
172218
if (value && !values.includes(value)) values.push(value);
173219
}

adlint/static/index.html

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ <h2>Draft ad</h2>
4949
<label>
5050
Platform
5151
<select id="platform" name="platform">
52-
<option value="google">Google</option>
52+
<option value="all">All</option>
53+
<option value="google" selected>Google</option>
5354
<option value="tiktok">TikTok</option>
5455
<option value="linkedin">LinkedIn</option>
5556
<option value="meta">Meta</option>
@@ -81,18 +82,17 @@ <h2>Draft ad</h2>
8182
</label>
8283
<label>
8384
Model
84-
<input
85+
<select
8586
id="ollama_model"
8687
name="ollama_model"
87-
list="ollama-model-options"
88-
autocomplete="off"
89-
value="gpt-oss-safeguard:20b"
90-
placeholder="gpt-oss-safeguard:20b"
91-
/>
88+
>
89+
<option value="gpt-oss-safeguard:20b" selected>gpt-oss-safeguard:20b</option>
90+
<option value="gpt-oss:20b">gpt-oss:20b</option>
91+
<option value="qwen3-coder:30b">qwen3-coder:30b</option>
92+
<option value="qwen3.5:35b-a3b">qwen3.5:35b-a3b</option>
93+
<option value="gemma4:26b">gemma4:26b</option>
94+
</select>
9295
</label>
93-
<datalist id="ollama-model-options">
94-
<option value="gpt-oss-safeguard:20b"></option>
95-
</datalist>
9696
</fieldset>
9797

9898
<fieldset>

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def test_ui_assets_are_served() -> None:
201201

202202
assert js_response.status_code == 200
203203
assert css_response.status_code == 200
204-
assert 'fetch("/analyze"' in js_response.text
204+
assert 'fetchWithTimeout("/analyze"' in js_response.text
205205
assert "logging_enabled: true" not in js_response.text
206206
assert ".result-panel" in css_response.text
207207

tests/test_ollama.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def fake_urlopen(request, timeout):
9898
assert seen["payload"]["model"] == "llama3.2:latest"
9999
assert seen["payload"]["stream"] is False
100100
assert seen["payload"]["format"] == "json"
101+
assert seen["payload"]["think"] is False
101102
assert seen["payload"]["options"] == {"temperature": 0}
102103
assert seen["payload"]["messages"][0]["role"] == "user"
103104

@@ -218,6 +219,35 @@ def fake_urlopen(request, timeout):
218219
assert "valid JSON" in info["validation_error"]
219220

220221

222+
def test_classify_with_ollama_accepts_fenced_json_response(monkeypatch) -> None:
223+
def fake_urlopen(request, timeout):
224+
return FakeResponse(
225+
{
226+
"message": {
227+
"content": """```json
228+
{
229+
"decision": "needs_review",
230+
"categories": ["platform"],
231+
"evidence": ["review claim"],
232+
"recommended_action": "Route for platform review."
233+
}
234+
```"""
235+
}
236+
}
237+
)
238+
239+
monkeypatch.setattr("urllib.request.urlopen", fake_urlopen)
240+
241+
hits, info = classify_with_ollama(
242+
Submission(platform="google", country="US", industry="general"),
243+
endpoint="http://localhost:11434/api/chat",
244+
)
245+
246+
assert info["status"] == "ok"
247+
assert info["raw_decision"] == "needs_review"
248+
assert [hit.policy_id for hit in hits] == ["model_policy_review"]
249+
250+
221251
def test_classify_with_ollama_rejects_unknown_decision_without_hits(monkeypatch) -> None:
222252
def fake_urlopen(request, timeout):
223253
return FakeResponse({"message": {"content": '{"decision": "banana", "evidence": []}'}})

tests/test_policy.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,22 @@ def test_filter_policies_applies_platform_and_industry_filters(tmp_path) -> None
7474
assert filter_policies(policies, wrong_industry) == []
7575

7676

77+
def test_filter_policies_all_platform_includes_platform_scoped_policies(tmp_path) -> None:
78+
policy_path = tmp_path / "custom.yml"
79+
policy_path.write_text(CUSTOM_POLICY, encoding="utf-8")
80+
policies = load_policies([policy_path])
81+
all_platforms = Submission.from_dict(
82+
{
83+
"platform": "all",
84+
"industry": "health",
85+
"headline": "Clinical guarantee",
86+
"policy_modules": ["health_claims"],
87+
}
88+
)
89+
90+
assert [policy.id for policy in filter_policies(policies, all_platforms)] == ["custom_health_claim"]
91+
92+
7793
def test_bundled_meta_ads_policy_module_is_platform_scoped() -> None:
7894
meta_policy_ids = {
7995
"meta_personal_attributes_health",

0 commit comments

Comments
 (0)