From ac331f10c701b40979b805c685ebe1208ca12ef8 Mon Sep 17 00:00:00 2001 From: Eric Lee Date: Thu, 11 Jun 2026 19:50:15 -0700 Subject: [PATCH] workflow: lenient schema-keyed coercion for StructuredOutput (#282) Weak models (glm) emit JSON types as strings ("42", "true", arrays as JSON-encoded strings) and burned every schema-repair retry on trivially coercible mismatches, making structured workflow stages effectively Anthropic/OpenAI-only. coerce_to_schema runs before strict validation in the collector: keyed strictly on what the schema expects (never guessing from the value), recursive through properties/items, never raises, and anything uncoercible passes through so strict validation reports the real error. ajv coerceTypes parity choices: "3.0" -> 3 for integer schemas; a string satisfying a union with "string" is never rewritten; NaN / Infinity strings are rejected (not valid JSON numbers); bools never become ints. The tool records the coerced value everywhere so all consumers see one schema-conformant shape. Closes #282 Co-Authored-By: Claude Opus 4.7 --- src/workflow/structured.py | 118 +++++++++++++++++++- tests/workflow/test_structured.py | 173 ++++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+), 2 deletions(-) diff --git a/src/workflow/structured.py b/src/workflow/structured.py index c60e93f9..b8147cbd 100644 --- a/src/workflow/structured.py +++ b/src/workflow/structured.py @@ -13,6 +13,8 @@ from __future__ import annotations +import json +import math from dataclasses import dataclass from typing import Any, Mapping, Optional @@ -37,6 +39,109 @@ def validate_structured(obj: Any, schema: Mapping[str, Any]) -> tuple[bool, Opti return False, str(exc) +def _schema_types(schema: Mapping[str, Any]) -> tuple[str, ...]: + declared = schema.get("type") + if isinstance(declared, str): + return (declared,) + if isinstance(declared, list): + return tuple(t for t in declared if isinstance(t, str)) + return () + + +def coerce_to_schema(obj: Any, schema: Any) -> Any: + """Lenient pre-pass for weak-model outputs (#282). + + Weak models (glm et al.) emit JSON types as strings — ``"42"``, + ``"true"``, a JSON-encoded array — and burn every schema-repair + retry on trivially coercible mismatches. Keyed strictly on what the + schema *expects*: numeric/boolean strings coerce where the schema + wants ``number``/``integer``/``boolean``; string values parse via + ``json.loads`` where it wants ``array``/``object``; integral floats + coerce where it wants ``integer``. Anything that doesn't cleanly + coerce is returned unchanged so strict validation reports the real + error. Never raises. + + Only ``type``/``properties``/``items`` are consulted — + ``anyOf``/``oneOf`` union schemas get no coercion (values pass + through to strict validation untouched). + """ + if not isinstance(schema, Mapping): + return obj + types = _schema_types(schema) + + if isinstance(obj, str): + # A string that already satisfies a union with "string" is left + # alone (ajv coerceTypes parity: coerce only when the value + # matches no declared type). + if "string" in types: + return obj + text = obj.strip() + if "boolean" in types and text.lower() in ("true", "false"): + return text.lower() == "true" + if "integer" in types: + try: + return int(text) + except ValueError: + # ajv coerceTypes parity: "3.0" -> 3 for integer + # schemas, but only for finite, integral floats. + try: + parsed = float(text) + except ValueError: + pass + else: + if math.isfinite(parsed) and parsed.is_integer(): + return int(parsed) + if "number" in types: + try: + parsed = float(text) + except ValueError: + pass + else: + # isfinite: "NaN"/"Infinity" parse as floats but are + # not valid JSON numbers — let strict validation reject + # the original string loudly instead. + if math.isfinite(parsed): + return int(parsed) if parsed.is_integer() else parsed + if ("array" in types or "object" in types) and text[:1] in ("[", "{"): + try: + decoded = json.loads(text) + except ValueError: + return obj + if (isinstance(decoded, list) and "array" in types) or ( + isinstance(decoded, dict) and "object" in types + ): + return coerce_to_schema(decoded, schema) + return obj + + # Bools never reach this branch (isinstance(True, float) is False) + # and fall through every other rule untouched — True must never + # become 1. inf/nan are excluded by is_integer(). + if ( + isinstance(obj, float) + and "integer" in types + and "number" not in types + and obj.is_integer() + ): + return int(obj) + + if isinstance(obj, dict): + properties = schema.get("properties") + if isinstance(properties, Mapping): + return { + key: coerce_to_schema(value, properties.get(key)) + for key, value in obj.items() + } + return obj + + if isinstance(obj, list): + items = schema.get("items") + if isinstance(items, Mapping): + return [coerce_to_schema(item, items) for item in obj] + return obj + + return obj + + @dataclass class StructuredOutputCollector: """Accumulates a schema subagent's ``StructuredOutput`` emissions. @@ -58,6 +163,11 @@ def offer(self, obj: Any) -> tuple[bool, Optional[str]]: if self.succeeded: return True, None self.attempts += 1 + # Lenient pre-pass (#282): coerce string-typed scalars and + # JSON-encoded containers toward what the schema expects before + # strict validation, so weak-model outputs don't burn retries on + # trivially fixable type mismatches. + obj = coerce_to_schema(obj, self.schema) ok, error = validate_structured(obj, self.schema) if ok: self.value = obj @@ -87,12 +197,16 @@ def make_structured_output_tool(collector: StructuredOutputCollector) -> Tool: def _call(tool_input: dict, context: Any) -> ToolResult: accepted, error = collector.offer(tool_input) if accepted: + # collector.value is the (possibly coerced — #282) accepted + # object; record that, not the raw emission, so every + # consumer sees the same schema-conformant shape. + accepted_value = collector.value outbox = getattr(context, "outbox", None) if outbox is not None: - outbox.append({"tool": SYNTHETIC_OUTPUT_TOOL_NAME, "structured_output": tool_input}) + outbox.append({"tool": SYNTHETIC_OUTPUT_TOOL_NAME, "structured_output": accepted_value}) return ToolResult( name=SYNTHETIC_OUTPUT_TOOL_NAME, - output={"data": "Structured output accepted.", "structured_output": tool_input}, + output={"data": "Structured output accepted.", "structured_output": accepted_value}, ) if collector.exhausted: return ToolResult( diff --git a/tests/workflow/test_structured.py b/tests/workflow/test_structured.py index 5395c3cf..658e2603 100644 --- a/tests/workflow/test_structured.py +++ b/tests/workflow/test_structured.py @@ -100,3 +100,176 @@ def test_structured_tool_reports_exhaustion(): assert result.is_error assert "after 1 attempts" in result.output["data"] assert collector.exhausted is True + + +# --------------------------------------------------------------------------- +# #282 — lenient type coercion for weak-model outputs +# --------------------------------------------------------------------------- + +from src.workflow.structured import coerce_to_schema + + +class TestCoerceToSchema: + def test_numeric_strings_coerce(self): + schema = { + "type": "object", + "properties": { + "count": {"type": "integer"}, + "score": {"type": "number"}, + }, + } + out = coerce_to_schema({"count": "42", "score": "3.14"}, schema) + assert out == {"count": 42, "score": 3.14} + + def test_boolean_strings_coerce(self): + schema = {"type": "object", "properties": {"ok": {"type": "boolean"}}} + assert coerce_to_schema({"ok": "true"}, schema) == {"ok": True} + assert coerce_to_schema({"ok": "False"}, schema) == {"ok": False} + + def test_json_encoded_array_string_coerces(self): + # The glm shape from PR #266: an array returned as its JSON string. + schema = { + "type": "object", + "properties": { + "bugs": {"type": "array", "items": {"type": "string"}}, + }, + } + out = coerce_to_schema({"bugs": '["a", "b"]'}, schema) + assert out == {"bugs": ["a", "b"]} + + def test_json_encoded_object_string_coerces(self): + schema = { + "type": "object", + "properties": {"meta": {"type": "object"}}, + } + out = coerce_to_schema({"meta": '{"k": 1}'}, schema) + assert out == {"meta": {"k": 1}} + + def test_nested_coercion_through_items(self): + schema = { + "type": "object", + "properties": { + "findings": { + "type": "array", + "items": { + "type": "object", + "properties": { + "line": {"type": "integer"}, + "real": {"type": "boolean"}, + }, + }, + }, + }, + } + out = coerce_to_schema( + {"findings": [{"line": "12", "real": "true"}]}, schema + ) + assert out == {"findings": [{"line": 12, "real": True}]} + + def test_integral_float_coerces_to_integer(self): + schema = {"type": "object", "properties": {"n": {"type": "integer"}}} + assert coerce_to_schema({"n": 42.0}, schema) == {"n": 42} + + def test_uncoercible_values_pass_through_unchanged(self): + schema = { + "type": "object", + "properties": { + "count": {"type": "integer"}, + "flag": {"type": "boolean"}, + "items": {"type": "array"}, + }, + } + original = {"count": "not-a-number", "flag": "yes", "items": "[broken"} + assert coerce_to_schema(original, schema) == original + + def test_string_schema_leaves_numeric_strings_alone(self): + schema = {"type": "object", "properties": {"id": {"type": "string"}}} + assert coerce_to_schema({"id": "42"}, schema) == {"id": "42"} + + def test_bool_never_coerces_to_integer(self): + schema = {"type": "object", "properties": {"n": {"type": "integer"}}} + assert coerce_to_schema({"n": True}, schema) == {"n": True} + + def test_type_list_schemas(self): + schema = {"type": "object", "properties": {"v": {"type": ["integer", "null"]}}} + assert coerce_to_schema({"v": "7"}, schema) == {"v": 7} + + def test_non_mapping_schema_is_noop(self): + assert coerce_to_schema({"x": "1"}, None) == {"x": "1"} + + +class TestCollectorCoercion: + def test_weak_model_emission_accepted_first_try(self): + # End-to-end glm fixture: every scalar stringly-typed, the array + # JSON-encoded — must validate WITHOUT burning a retry. + schema = { + "type": "object", + "properties": { + "count": {"type": "integer"}, + "confident": {"type": "boolean"}, + "tags": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["count", "confident", "tags"], + } + collector = StructuredOutputCollector(schema=schema) + accepted, error = collector.offer( + {"count": "3", "confident": "true", "tags": '["perf", "bug"]'} + ) + assert accepted is True and error is None + assert collector.value == { + "count": 3, + "confident": True, + "tags": ["perf", "bug"], + } + assert collector.attempts == 1 + + def test_tool_records_coerced_value_everywhere(self): + schema = { + "type": "object", + "properties": {"n": {"type": "integer"}}, + "required": ["n"], + } + collector = StructuredOutputCollector(schema=schema) + tool = make_structured_output_tool(collector) + ctx = SimpleNamespace(outbox=[]) + result = tool.call({"n": "5"}, ctx) + assert not result.is_error + assert result.output["structured_output"] == {"n": 5} + assert ctx.outbox[0]["structured_output"] == {"n": 5} + assert collector.value == {"n": 5} + + def test_genuinely_wrong_output_still_errors(self): + schema = { + "type": "object", + "properties": {"n": {"type": "integer"}}, + "required": ["n"], + } + collector = StructuredOutputCollector(schema=schema, max_retries=1) + accepted, error = collector.offer({"n": "not-a-number"}) + assert accepted is False + assert error is not None + assert collector.exhausted is True + + +class TestCoercionEdgeCases: + def test_nan_and_infinity_strings_are_rejected(self): + # float("NaN") parses but is not a valid JSON number — must NOT + # silently enter the structured-output contract. + schema = {"type": "object", "properties": {"score": {"type": "number"}}} + out = coerce_to_schema({"score": "NaN"}, schema) + assert out == {"score": "NaN"} # unchanged -> strict validation fails + for bad in ("Infinity", "-inf", "nan"): + assert coerce_to_schema({"score": bad}, schema) == {"score": bad} + + def test_float_string_coerces_for_integer_only_schema(self): + # ajv coerceTypes parity: "3.0" -> 3 where the schema wants integer. + schema = {"type": "object", "properties": {"n": {"type": "integer"}}} + assert coerce_to_schema({"n": "3.0"}, schema) == {"n": 3} + # Non-integral float strings stay put for the real error. + assert coerce_to_schema({"n": "3.5"}, schema) == {"n": "3.5"} + + def test_string_union_leaves_string_alone(self): + # ajv coerceTypes parity: coerce only when the value matches no + # declared type. + schema = {"type": "object", "properties": {"v": {"type": ["string", "integer"]}}} + assert coerce_to_schema({"v": "42"}, schema) == {"v": "42"}