From d5f97722f14dc9d5416e0749b2ea730147c0b96f Mon Sep 17 00:00:00 2001 From: ramkrishna2910 Date: Wed, 24 Jun 2026 15:52:12 -0700 Subject: [PATCH 1/7] [Router] Interfaces + fixtures stub (foundation) (#2407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Land the shared C++ contract surface + schema fixtures the rest of the routing engine codes against — the first development step after schema sign-off, gating the parallel engine/wiring tracks. - routing_policy.h: RouteContext, Score (label->score), ClassifierServices (embed/run_classifier/chat injection seam), Classifier, MatchExpr AST, Condition + EvalContext + LeafFactory (the runtime-evaluation seam shared by the evaluator and the registry), Rule, Decision/TraceEntry, RoutePolicy, RoutingPolicyEngine ctor. Std + nlohmann/json only; no Router/backend include. route() declared, not defined. - src/cpp/resources/schemas: frozen route_policy + decision JSON Schemas + README. Both carry a required root `version` ("1") and pin engine-owned v1 semantics (keywords=substring, regex=ECMAScript, inclusive bands w/ default 0.5, min/max_chars=UTF-8 bytes, on_error default match_false, router desugaring). README documents the back-compat contract (never redefine a shipped field; never delete a major's schema; migrate-on-load). - Mechanical enforcement: schema-lock.json + test/test_schema_lock.py (canonical-hash snapshot guard); test/test_routing_fixtures.py (schemas self-valid + fixtures conformant). - test/cpp/fixtures/routing: lean L0a-L3 collection.router examples + decision. - fake_classifier_services.h + test_routing_policy_contract.cpp (CTest RoutingPolicyContractTest): header compiles standalone, types construct, Condition/EvalContext/LeafFactory seam exercised, fake services callable, engine constructible, fixtures satisfy locked invariants. Comments describe concepts, not tracker IDs; the issue linkage lives in this commit and the PR. Depends on schema sign-off; landing as draft until then. Co-Authored-By: Claude Opus 4.8 (1M context) --- CMakeLists.txt | 24 ++ src/cpp/include/lemon/routing_policy.h | 317 ++++++++++++++++++ src/cpp/resources/schemas/README.md | 110 ++++++ .../resources/schemas/decision.schema.json | 57 ++++ .../schemas/route_policy.schema.json | 176 ++++++++++ src/cpp/resources/schemas/schema-lock.json | 10 + test/cpp/fake_classifier_services.h | 74 ++++ .../fixtures/routing/decision_example.json | 11 + test/cpp/fixtures/routing/l0a_llm_router.json | 15 + test/cpp/fixtures/routing/l1_keywords.json | 20 ++ test/cpp/fixtures/routing/l2_semantic.json | 20 ++ test/cpp/fixtures/routing/l3_classifier.json | 24 ++ test/cpp/test_routing_policy_contract.cpp | 302 +++++++++++++++++ test/requirements.txt | 3 + test/test_routing_fixtures.py | 89 +++++ test/test_schema_lock.py | 110 ++++++ 16 files changed, 1362 insertions(+) create mode 100644 src/cpp/include/lemon/routing_policy.h create mode 100644 src/cpp/resources/schemas/README.md create mode 100644 src/cpp/resources/schemas/decision.schema.json create mode 100644 src/cpp/resources/schemas/route_policy.schema.json create mode 100644 src/cpp/resources/schemas/schema-lock.json create mode 100644 test/cpp/fake_classifier_services.h create mode 100644 test/cpp/fixtures/routing/decision_example.json create mode 100644 test/cpp/fixtures/routing/l0a_llm_router.json create mode 100644 test/cpp/fixtures/routing/l1_keywords.json create mode 100644 test/cpp/fixtures/routing/l2_semantic.json create mode 100644 test/cpp/fixtures/routing/l3_classifier.json create mode 100644 test/cpp/test_routing_policy_contract.cpp create mode 100644 test/test_routing_fixtures.py create mode 100644 test/test_schema_lock.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 70c3bf352..d9d30e657 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1788,6 +1788,30 @@ if(EXISTS "${_INSTALL_ATOMICITY_TEST_SRC}") add_test(NAME InstallAtomicityTest COMMAND test_install_atomicity) endif() +# Routing engine contract surface (issue #2407). Header-only foundation: the +# shared types/interfaces in routing_policy.h plus the fake ClassifierServices. +# The test loads the L0a-L3 fixtures from the source tree via ROUTING_FIXTURE_DIR. +set(_ROUTING_CONTRACT_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/test_routing_policy_contract.cpp" +) +if(EXISTS "${_ROUTING_CONTRACT_TEST_SRC}") + add_executable(test_routing_policy_contract + test/cpp/test_routing_policy_contract.cpp + ) + target_include_directories(test_routing_policy_contract PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src/cpp/include + ${CMAKE_CURRENT_SOURCE_DIR}/test/cpp + ${CMAKE_CURRENT_BINARY_DIR}/include + ) + target_link_libraries(test_routing_policy_contract PRIVATE nlohmann_json::nlohmann_json) + target_compile_definitions(test_routing_policy_contract PRIVATE + ROUTING_FIXTURE_DIR="${CMAKE_CURRENT_SOURCE_DIR}/test/cpp/fixtures/routing" + ) + + include(CTest) + add_test(NAME RoutingPolicyContractTest COMMAND test_routing_policy_contract) +endif() + # Auto-tune: GGUF array storage, scalar derivation, weighted KV cache computation. # Covers head_count_kv_per_layer, sliding_window_pattern, SWA precise weighted sum, # full_attention_interval exact count, and scalar fallback paths. diff --git a/src/cpp/include/lemon/routing_policy.h b/src/cpp/include/lemon/routing_policy.h new file mode 100644 index 000000000..86aec285d --- /dev/null +++ b/src/cpp/include/lemon/routing_policy.h @@ -0,0 +1,317 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// Contract surface for the generic routing engine (the "Lemonade Router"). +// +// This header is the foundation: the shared types, interfaces, and the engine +// constructor signature that the rest of the engine codes against. It is +// intentionally behavior-free — the match-expression evaluator, the classifier / +// condition registry, the deterministic conditions, the semantic_similarity and +// llm classifiers, the engine assembly, the parser, and the live Router wiring +// all implement against the declarations here. +// +// Design north star: the engine does PURE model selection — boolean rules over +// classifiers, first-match-wins, fail-open to default_model. It emits a Decision +// plus an optional per-condition trace. Everything trust-specific (verdicts, +// block, audit persistence, consent) is layered ON TOP via three seams that the +// engine never interprets: +// 1. RouteContext::metadata — caller-supplied routing inputs. +// 2. Rule::outputs — a pass-through bag copied verbatim into Decision. +// 3. Decision::trace — what the client/audit sink logs. +// +// INVARIANT: this header includes ONLY the standard library and nlohmann/json. +// It must never include a backend or Router header. Backends are reached only +// through ClassifierServices (a struct of std::function injection points), the +// same subprocess-friendly seam pattern used by CollectionOrchestrator. + +namespace lemon { + +using json = nlohmann::json; + +// --------------------------------------------------------------------------- +// Request-side context +// --------------------------------------------------------------------------- + +// Generic, backend-agnostic view of one routing request. Built by the dispatch +// layer from the inbound OpenAI chat body; consumed by conditions and +// classifiers. No trust vocabulary lives here — `metadata` is an opaque string +// map whose keys are the policy author's business. +struct RouteContext { + // The text the classifiers/conditions see (typically the latest user turn). + std::string input; + + // Cheap, deterministic request features. `chars` is a UTF-8 byte count (the + // frozen v1 unit for min_chars/max_chars; token-based length is deferred to + // a future min_tokens/max_tokens, never a redefinition of chars). + struct Params { + std::string model; // the collection.router model name addressed + bool has_tools = false; // request carried a non-empty tools[] array + bool has_images = false; // request carried image content parts + std::size_t chars = 0; // UTF-8 byte count of `input` + } params; + + // Routing inputs carried on the OpenAI `metadata` body field. List values + // are comma-encoded by the caller; the engine exposes them verbatim. Trust + // puts keys like "task_class"/"consent" here. + std::map metadata; +}; + +// --------------------------------------------------------------------------- +// Classifier output + error policy +// --------------------------------------------------------------------------- + +// What a Classifier produces for one (classifier, input) pair. A `classifier` +// returns label -> score in [0,1] (HF text-classification convention); +// `semantic_similarity` is the fixed-shape exception and reports a single cosine +// score under the empty-string key. +// +// Scores are engine-opaque: a condition applies a min_score/max_score band to +// the score of a chosen label to produce a bool. +struct Score { + // label -> score. For semantic_similarity: {"": max_cosine}. + std::map labels; + + // false => the classifier failed to evaluate (model error / timeout); the + // owning condition then applies its `on_error` policy instead of the band. + bool ok = true; + + // Optional human-readable rationale (the `llm` router records its pick here) + // — surfaced in the trace, never used for matching. + std::string rationale; + + // Score for an explicit label, or 0.0 if absent. + double score_of(const std::string& label) const { + auto it = labels.find(label); + return it == labels.end() ? 0.0 : it->second; + } + + // The single/primary score — the lone entry, or the empty-key entry for + // semantic_similarity. Returns 0.0 if empty. + double primary() const { + if (labels.empty()) return 0.0; + auto it = labels.find(""); + return it != labels.end() ? it->second : labels.begin()->second; + } +}; + +// Behavior when a classifier fails to evaluate. match_true is "fail-closed +// authoring" — a failed PII/jailbreak check still trips its rule and keeps the +// request local. +enum class OnError { + MatchTrue, // "match_true" + MatchFalse, // "match_false" +}; + +// Single source of truth for the on_error string<->enum mapping so the parser +// and any tooling agree. Defaults to MatchFalse (fail-open) when unset or +// unrecognized; the parser is responsible for rejecting bad values loudly. +inline OnError parse_on_error(const std::string& s) { + return s == "match_true" ? OnError::MatchTrue : OnError::MatchFalse; +} + +inline const char* on_error_to_string(OnError e) { + return e == OnError::MatchTrue ? "match_true" : "match_false"; +} + +// --------------------------------------------------------------------------- +// Backend injection seam +// --------------------------------------------------------------------------- + +// The ONLY way the pure engine touches live backends. Real implementations bind +// these to the Router (embeddings / classifier-model invocation / chat); tests +// bind them to fakes (fixed vectors / fixed scores / fixed text). Keeping them +// std::function keeps routing_policy.h free of any Router include. +struct ClassifierServices { + // Embed `text` with `model`; powers semantic_similarity. Maps to + // Router::embeddings. + std::function(const std::string& model, + const std::string& text)> embed; + + // Run a text-classification `model` over `text`; returns label -> score. + // Powers the generic `classifier` type. + std::function(const std::string& model, + const std::string& text)> run_classifier; + + // Run a chat `model` with a system `prompt` over `input`; returns the raw + // assistant text. Powers the `llm` router / L0a on-ramp. Maps to + // Router::chat_completion. + std::function chat; +}; + +// --------------------------------------------------------------------------- +// Classifiers +// --------------------------------------------------------------------------- + +// What a classifier sees when evaluated. Holds the request plus the injected +// services it may call. The const& outlive the call. +struct ClassifierContext { + const RouteContext& request; + const ClassifierServices& services; +}; + +// Abstract base for every classifier type (semantic_similarity, classifier, +// llm, and the reserved vLLM-SR presets). Concrete subclasses and the registry +// that instantiates them from JSON live alongside their implementations. +class Classifier { +public: + virtual ~Classifier() = default; + + // Evaluate once for the given request. Implementations must set Score::ok + // false (rather than throw) on backend failure so the owning condition can + // apply on_error. + virtual Score evaluate(const ClassifierContext& ctx) const = 0; + + const std::string& id() const { return id_; } + const std::string& type() const { return type_; } + OnError on_error() const { return on_error_; } + +protected: + Classifier(std::string id, std::string type, OnError on_error) + : id_(std::move(id)), type_(std::move(type)), on_error_(on_error) {} + + std::string id_; + std::string type_; + OnError on_error_ = OnError::MatchFalse; +}; + +using ClassifierPtr = std::shared_ptr; + +// --------------------------------------------------------------------------- +// Match AST +// --------------------------------------------------------------------------- + +// A node in a rule's `match` expression. Nested any/all/not over leaf +// conditions; vLLM-SR's flat single-operator form is the degenerate subset. +// Leaf condition parsing (deterministic ops, classifier-band refs) is handled +// where those conditions are implemented — the foundation carries the raw leaf +// JSON so the AST shape is fixed without committing to leaf semantics. +struct MatchExpr { + enum class Op { Leaf, All, Any, Not }; + + Op op = Op::Leaf; + + // For All/Any/Not. Not has exactly one child. + std::vector children; + + // For Leaf: the raw condition object, e.g. {"keywords_any":[...]} or + // {"classifier":"pii","min_score":0.5}. + json leaf; +}; + +// --------------------------------------------------------------------------- +// Rules + Decision +// --------------------------------------------------------------------------- + +// One ordered, first-match-wins rule. `route_to` must name a candidate. +// `outputs` is engine-opaque and copied verbatim into Decision::outputs. +struct Rule { + std::string id; + MatchExpr match; + std::string route_to; + json outputs = json::object(); +}; + +// One per-condition trace entry. Emitted only when route_trace=true. +struct TraceEntry { + std::string condition; // e.g. "classifier:pii", "keywords_any" + std::optional score; // present for classifier conditions + bool result = false; // the leaf's boolean outcome +}; + +// The engine's output for one request. Pure selection — no verdict / +// route-category / action in core; those are read by trust off `outputs`. +struct Decision { + std::string route_to; // selected candidate (also the `model`) + std::string matched_rule; // matched rule id, empty if defaulted + bool default_used = false; // true => fell through to default_model + json outputs = json::object(); // verbatim from the matched rule + std::vector trace; // populated only when trace requested +}; + +// --------------------------------------------------------------------------- +// Match evaluation (runtime tree) +// --------------------------------------------------------------------------- + +// Per-request state threaded through a Condition tree during evaluation. +// Mutable: classifiers memoize their Score here (each runs at most once per +// request) and leaves append to `trace` when `want_trace` is set. +struct EvalContext { + const RouteContext& request; + const ClassifierServices& services; + bool want_trace = false; + + // classifier id -> its Score for this request. Input text is constant within + // a request, so the classifier id is a sufficient memo key. + std::map memo; + + // Per-condition trace; appended only when want_trace, surfaced verbatim as + // Decision::trace. + std::vector trace; +}; + +// A node in a rule's compiled match tree. Both composites (all/any/not) and +// leaves (deterministic ops, classifier-band) are Conditions. The registry +// compiles a MatchExpr — whose leaves are raw JSON — into a Condition tree via a +// LeafFactory; the evaluator supplies the composites and the classifier-band +// leaf; the engine evaluates the root Condition per rule. +// +// Implementations MUST NOT throw: a classifier failure surfaces via Score::ok +// and the band's on_error policy, never an exception (the engine fails open to +// default_model on anything unexpected). +class Condition { +public: + virtual ~Condition() = default; + virtual bool evaluate(EvalContext& ctx) const = 0; +}; +using ConditionPtr = std::shared_ptr; + +// Builds a leaf Condition from a single leaf object (e.g. {"keywords_any":[...]} +// or {"classifier":"pii","min_score":0.5}). This is the seam between the +// structural evaluator (composites) and the concrete leaf builders (deterministic +// ops + classifier-band) registered downstream — neither side depends on the +// other's implementation, only on this typedef. +using LeafFactory = std::function; + +// --------------------------------------------------------------------------- +// Policy + engine (constructor signature only here) +// --------------------------------------------------------------------------- + +// The parsed, resolved routing policy (produced by the parser). Classifier +// condition refs in the rules resolve against `classifiers` by id. +struct RoutePolicy { + std::vector candidates; // routing targets + std::string default_model; // fail-open target ∈ candidates + std::vector rules; // ordered, first-match-wins + std::map classifiers; // id -> classifier +}; + +// The routing engine. The CONSTRUCTOR SIGNATURE is frozen here; the routing +// logic (first-match evaluation, fail-open, trace assembly) is implemented with +// the engine assembly. `route()` is declared but intentionally NOT defined in +// the foundation — the contract test constructs the engine but never calls it. +class RoutingPolicyEngine { +public: + RoutingPolicyEngine(RoutePolicy policy, ClassifierServices services) + : policy_(std::move(policy)), services_(std::move(services)) {} + + // Select a candidate for `ctx`. When `want_trace` is set, the returned + // Decision carries a per-condition trace. + Decision route(const RouteContext& ctx, bool want_trace) const; + + const RoutePolicy& policy() const { return policy_; } + +private: + RoutePolicy policy_; + ClassifierServices services_; +}; + +} // namespace lemon diff --git a/src/cpp/resources/schemas/README.md b/src/cpp/resources/schemas/README.md new file mode 100644 index 000000000..0b24259b8 --- /dev/null +++ b/src/cpp/resources/schemas/README.md @@ -0,0 +1,110 @@ +# Routing engine schemas (Lemonade Router) + +Frozen JSON Schemas for the generic routing engine. The engine does **pure model +selection** (boolean rules over classifiers, first-match-wins, fail-open to +`default_model`); trust-specific concerns (verdicts, block, audit, consent) are +layered on top via the `outputs` pass-through bag, request `metadata`, and the +decision `trace` — never in the engine. + +| Schema | Describes | +|--------|-----------| +| `route_policy.schema.json` | The `routing` block embedded in a `collection.router` collection JSON. Invoked like `collection.omni`: point the OpenAI `model` field at the collection name. | +| `decision.schema.json` | The `x_lemonade_route` decision object attached additively to the chat response. | + +## Versioning + +Both schemas carry a **required** root `version` field — these files define +version `"1"`. It is required (not optional) because the format is greenfield: +declaring it from the first policy avoids a breaking "add it later" migration and +lets a server branch cleanly on shape instead of guessing when the field is +absent. A server validates a document against the schema matching its major +version and rejects an unknown major with a clear message rather than a confusing +per-key error. The policy `version` is author-declared; the decision `version` is +always emitted by the engine. + +Evolution rule: +- **Additive, backward-compatible data** (new keys in the open `outputs` bag or + request `metadata`) needs no version bump — those seams are unconstrained. +- **New vocabulary** (a new condition op, classifier `type`, or decision field) + is a deliberate schema edit; a compatible addition stays within major `1`, a + breaking change ships a new major and a new schema file. + +## Compatibility & frozen semantics + +**Hard rule: a future server must never break a policy authored against an +earlier major.** This holds structurally — old policies keep validating against a +newer (superset) schema, and `additionalProperties: false` only constrains the +forward direction (an old server rejecting a *newer* policy), which `version` +handles cleanly. + +Two operating principles keep it true: + +1. **Never redefine a shipped field — only add.** A v1 field's meaning is + immutable. New behavior arrives as a *new* field / op / classifier `type` + (which old policies don't use), never as a reinterpretation of an existing + one. The `classifier.type` enum already reserves future preset names for this + reason. +2. **Never delete or edit a shipped major's schema + parser.** Evolution is + additive: ship `vN+1` schemas and per-major load-time shims that upgrade older + documents to the latest internal form; retain every prior major's schema so + its policies still validate and route. + +These are enforced mechanically, not just by convention: + +- `schema-lock.json` + `test/test_schema_lock.py` pin a canonical hash of each + schema. A **released** major is immutable — any change fails CI (ship a new + major instead). An **unreleased** major may change, but only with a refreshed + lock in the same diff (`python test/test_schema_lock.py --update`), so every + schema edit is a visible, reviewed change rather than silent drift. (v1 is + `released: false` until schema sign-off; flip it to `true` at release.) +- `test/test_routing_fixtures.py` keeps the schemas self-valid and the example + fixtures conformant. +- A frozen conformance corpus (golden policy → expected `Decision`) will enforce + *behavioral* stability across versions; schema↔parser key parity is checked + where the parser is built. (Both are tracked separately in the milestone.) + +Lemonade executes a policy identically regardless of version — the only behavioral +drift for model-backed classifiers (semantic_similarity / classifier / llm) comes +from the **backend engine or the model**, not from lemonade. To keep that drift +from silently flipping a route, the engine-owned semantics below are **frozen for +v1** (pinned in the schema field descriptions): + +| Semantic | Frozen v1 definition | +|----------|----------------------| +| `keywords_any` / `keywords_all` | case-insensitive **substring** over input text | +| `regex` | **ECMAScript** dialect (`std::regex`) | +| `min_score` / `max_score` | **inclusive** band (`>=` / `<=`); default `min_score: 0.5` when neither bound is given | +| `min_chars` / `max_chars` | input length in **UTF-8 bytes** (not code points) | +| `on_error` (omitted) | default **`match_false`** (fail-open) | +| `routing.router` desugaring | expansion to one `llm` classifier + identity rules is deterministic and behavior-equivalent across versions | + +Anything fancier (token/BM25 keyword matching, a different regex engine, +token-based length) ships as a new, separately named op — never by changing one +of the above. + +## Authoring levels (example fixtures) + +Lean local-form examples — `components` reference already-registered models by +name; the full `models[]` manifest is only needed for Hugging Face +redistribution. Fixtures live in `test/cpp/fixtures/routing/`: + +| Fixture | Level | Mechanism | +|---------|-------|-----------| +| `l0a_llm_router.json` | L0(a) | `routing.router` LLM-as-router (desugars to one `llm` classifier + identity rules) | +| `l1_keywords.json` | L1 | Deterministic `keywords_any` / `regex` / `min_chars` | +| `l2_semantic.json` | L2 | `semantic_similarity` classifier (embeddings + cosine) | +| `l3_classifier.json` | L3 | Model-backed `classifier` (PII / jailbreak) | +| `decision_example.json` | — | A `Decision` + `trace` | + +Levels **compose** — one policy may mix a router, classifiers, and deterministic +conditions across its rules. + +## Contract surface + +The C++ types/interfaces these schemas back live in +`src/cpp/include/lemon/routing_policy.h`. Validation: + +- **Schema validation** — `python test/test_routing_fixtures.py` (uses `jsonschema`). +- **Contract / cross-field invariants** — `test/cpp/test_routing_policy_contract.cpp` + (CTest target `RoutingPolicyContractTest`): default_model and every + `route_to` must be a candidate; classifier condition refs must resolve. diff --git a/src/cpp/resources/schemas/decision.schema.json b/src/cpp/resources/schemas/decision.schema.json new file mode 100644 index 000000000..a62f35850 --- /dev/null +++ b/src/cpp/resources/schemas/decision.schema.json @@ -0,0 +1,57 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://lemonade-sdk.github.io/schemas/decision.schema.json", + "title": "Lemonade route decision (x_lemonade_route)", + "description": "The decision object the engine emits, attached additively to the chat response body as `x_lemonade_route`. Pure model selection — no verdict/route-category/action in core; those are trust-customer concerns read off `outputs`. The scalar matched_rule is also surfaced in the `x-lemonade-route` response header.", + "type": "object", + "required": ["version", "route_to", "matched_rule", "default_used"], + "properties": { + "version": { + "description": "Schema major version of this decision object. This file defines version \"1\"; the engine always emits it so clients and audit sinks can branch on shape.", + "const": "1" + }, + "route_to": { + "description": "The selected candidate; also carried by the standard `model` field.", + "type": "string" + }, + "matched_rule": { + "description": "Id of the rule that matched; empty when the request fell through to default_model.", + "type": "string" + }, + "default_used": { + "description": "true when no rule matched and default_model was used (fail-open).", + "type": "boolean" + }, + "outputs": { + "description": "Copied verbatim from the matched rule; the engine never interprets it.", + "type": "object" + }, + "trace": { + "description": "Per-condition trace. Present ONLY when the request set route_trace=true; minimal/omitted by default so policy-internal signals do not leak to end users.", + "type": "array", + "items": { "$ref": "#/$defs/trace_entry" } + } + }, + "additionalProperties": false, + "$defs": { + "trace_entry": { + "type": "object", + "required": ["condition", "result"], + "properties": { + "condition": { + "description": "Identifies the leaf, e.g. \"classifier:pii\" or \"keywords_any\".", + "type": "string" + }, + "score": { + "description": "Present for classifier conditions; the score the band was applied to.", + "type": "number" + }, + "result": { + "description": "The leaf's boolean outcome.", + "type": "boolean" + } + }, + "additionalProperties": false + } + } +} diff --git a/src/cpp/resources/schemas/route_policy.schema.json b/src/cpp/resources/schemas/route_policy.schema.json new file mode 100644 index 000000000..b610a7757 --- /dev/null +++ b/src/cpp/resources/schemas/route_policy.schema.json @@ -0,0 +1,176 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://lemonade-sdk.github.io/schemas/route_policy.schema.json", + "title": "Lemonade route policy (collection.router)", + "description": "The routing block embedded in a collection.router collection JSON. Distributed and invoked exactly like collection.omni: point the OpenAI `model` field at the collection name and recipe:collection.router flips the server into engine mode. Generic engine; trust is a consumer. Vocabulary mirrors vLLM Semantic Router for migration.", + "type": "object", + "required": ["version", "recipe", "routing"], + "properties": { + "version": { + "description": "Schema major version this document targets. This file defines version \"1\"; a server validates a policy against the schema matching its major version and rejects unknown majors with a clear message rather than a per-key error. Required so parsers never have to guess.", + "const": "1" + }, + "model_name": { "type": "string" }, + "recipe": { "const": "collection.router" }, + "components": { + "description": "Lean local form: names of already-registered models (candidates AND classifier models). The full models[] manifest is only needed for Hugging Face redistribution.", + "type": "array", + "items": { "type": "string" } + }, + "models": { + "description": "Embedded component definitions for HF redistribution only; omitted when authoring locally.", + "type": "array" + }, + "routing": { "$ref": "#/$defs/routing" } + }, + "$defs": { + "routing": { + "type": "object", + "required": ["candidates", "default_model"], + "properties": { + "candidates": { + "description": "Explicit routing-target set (analog of vLLM SR providers.models). default_model and every rule.route_to must be a member.", + "type": "array", + "minItems": 1, + "items": { "type": "string" } + }, + "default_model": { + "description": "Fail-open target; must be a member of candidates.", + "type": "string" + }, + "router": { "$ref": "#/$defs/router_sugar" }, + "classifiers": { + "type": "array", + "items": { "$ref": "#/$defs/classifier" } + }, + "rules": { + "type": "array", + "items": { "$ref": "#/$defs/rule" } + } + }, + "anyOf": [ + { "required": ["router"] }, + { "required": ["rules"] } + ], + "additionalProperties": false + }, + "router_sugar": { + "description": "Zero-config L0(a) LLM-as-router. Desugars at load into one `llm` classifier (labels = candidates) + identity rules (label X -> route_to X). This desugaring is pure, deterministic engine logic and is frozen for v1: its expansion must stay behavior-equivalent so existing L0(a) policies keep routing identically across server versions.", + "type": "object", + "required": ["type", "model", "prompt"], + "properties": { + "type": { "const": "llm" }, + "model": { "type": "string" }, + "prompt": { "type": "string" } + }, + "additionalProperties": false + }, + "classifier": { + "type": "object", + "required": ["id", "type"], + "properties": { + "id": { "type": "string" }, + "type": { + "description": "v1: semantic_similarity, classifier, llm. Reserved vLLM-SR presets ship post-v1 as thin presets over `classifier`.", + "enum": [ + "semantic_similarity", + "classifier", + "llm", + "pii_detection", + "prompt_safety", + "language_detection", + "domain_classification", + "complexity", + "sentiment" + ] + }, + "model": { "type": "string" }, + "prompt": { "type": "string" }, + "labels": { + "description": "classifier output labels; dangling condition `label` refs are rejected.", + "type": "array", + "items": { "type": "string" } + }, + "default_label": { + "description": "Used when a condition omits `label`.", + "type": "string" + }, + "candidates": { + "description": "semantic_similarity only: exemplar phrases embedded once and compared by cosine.", + "type": "array", + "items": { "type": "string" } + }, + "on_error": { + "description": "Behavior when the classifier fails to evaluate. match_true = fail-closed authoring. Frozen v1 default when omitted: match_false (fail-open).", + "enum": ["match_true", "match_false"], + "default": "match_false" + } + }, + "additionalProperties": false + }, + "rule": { + "type": "object", + "required": ["id", "match", "route_to"], + "properties": { + "id": { "type": "string" }, + "match": { "$ref": "#/$defs/match_expr" }, + "route_to": { + "description": "Must name a candidate. The local/private/cloud route category is derived from the chosen component's recipe.", + "type": "string" + }, + "outputs": { + "description": "Engine-opaque pass-through bag, copied verbatim into the Decision (trust puts verdict here).", + "type": "object" + } + }, + "additionalProperties": false + }, + "match_expr": { + "description": "Nested any/all/not over leaf conditions; first-match-wins across rules. vLLM SR's flat single-operator form is the subset.", + "type": "object", + "minProperties": 1, + "properties": { + "any": { "type": "array", "items": { "$ref": "#/$defs/match_expr" } }, + "all": { "type": "array", "items": { "$ref": "#/$defs/match_expr" } }, + "not": { "$ref": "#/$defs/match_expr" }, + "classifier": { "type": "string" }, + "label": { "type": "string" }, + "min_score": { + "description": "Frozen v1 semantics: inclusive lower bound (score >= min_score). When neither min_score nor max_score is given, the band defaults to min_score: 0.5.", + "type": "number" + }, + "max_score": { + "description": "Frozen v1 semantics: inclusive upper bound (score <= max_score).", + "type": "number" + }, + "keywords_any": { + "description": "True if ANY keyword is present. Frozen v1 match: case-insensitive substring over the input text. Token/BM25 matching is a future, separately named op, never a redefinition of this one.", + "type": "array", + "items": { "type": "string" } + }, + "keywords_all": { + "description": "True if ALL keywords are present. Frozen v1 match: case-insensitive substring over the input text.", + "type": "array", + "items": { "type": "string" } + }, + "regex": { + "description": "True if the pattern matches the input. Frozen v1 dialect: ECMAScript (std::regex). A different regex engine ships as a separately named op, never a redefinition of this one.", + "type": "string" + }, + "min_chars": { + "description": "Inclusive lower bound on input length. Frozen v1 unit: UTF-8 bytes (not Unicode code points). Token-based length is a future min_tokens, never a redefinition of this one.", + "type": "integer", + "minimum": 0 + }, + "max_chars": { + "description": "Inclusive upper bound on input length. Frozen v1 unit: UTF-8 bytes (not Unicode code points).", + "type": "integer", + "minimum": 0 + }, + "has_tools": { "type": "boolean" }, + "has_images": { "type": "boolean" } + }, + "additionalProperties": false + } + } +} diff --git a/src/cpp/resources/schemas/schema-lock.json b/src/cpp/resources/schemas/schema-lock.json new file mode 100644 index 000000000..254adeaf8 --- /dev/null +++ b/src/cpp/resources/schemas/schema-lock.json @@ -0,0 +1,10 @@ +{ + "route_policy.schema.json": { + "sha256": "95c330125a343f37ac01ada491d0ec767c97ef623fc68fe53d6bb8dfa31f2e26", + "released": false + }, + "decision.schema.json": { + "sha256": "bcbc6c56c6e8f1426a774d3318b3409042ad8447d64a553b0b191335b7599150", + "released": false + } +} diff --git a/test/cpp/fake_classifier_services.h b/test/cpp/fake_classifier_services.h new file mode 100644 index 000000000..31161e677 --- /dev/null +++ b/test/cpp/fake_classifier_services.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include +#include "lemon/routing_policy.h" + +// A behavior-free fake ClassifierServices for routing-engine unit tests. Tests +// that exercise the contract surface — the match-expression evaluator, the +// classifier registry, the individual classifiers — bind the engine to this +// instead of the live Router so they run with no backend subprocess. +// +// It returns fixed, caller-configured outputs: +// - embed(model, text) -> a fixed vector (default: one configured per +// model, else a deterministic unit vector). +// - run_classifier(model, text) -> a fixed label->score map per model. +// - chat(model, prompt, input) -> a fixed reply per model. +// +// Nothing here implements routing or scoring logic; tests dictate every output. + +namespace lemon { +namespace testing { + +class FakeClassifierServices { +public: + // Configure a fixed embedding vector returned for `model`. + void set_embedding(const std::string& model, std::vector vec) { + embeddings_[model] = std::move(vec); + } + + // Configure a fixed label->score map returned for `model`. + void set_classifier_scores(const std::string& model, + std::map scores) { + classifier_scores_[model] = std::move(scores); + } + + // Configure a fixed chat reply returned for `model`. + void set_chat_reply(const std::string& model, std::string reply) { + chat_replies_[model] = std::move(reply); + } + + // Build a ClassifierServices wired to this fake. The returned struct copies + // `this` by pointer, so keep the FakeClassifierServices alive for the + // services' lifetime. + ClassifierServices make() { + ClassifierServices svc; + FakeClassifierServices* self = this; + svc.embed = [self](const std::string& model, const std::string&) { + auto it = self->embeddings_.find(model); + if (it != self->embeddings_.end()) return it->second; + return std::vector{1.0f, 0.0f, 0.0f}; + }; + svc.run_classifier = [self](const std::string& model, const std::string&) { + auto it = self->classifier_scores_.find(model); + if (it != self->classifier_scores_.end()) return it->second; + return std::map{}; + }; + svc.chat = [self](const std::string& model, const std::string&, + const std::string&) { + auto it = self->chat_replies_.find(model); + if (it != self->chat_replies_.end()) return it->second; + return std::string{}; + }; + return svc; + } + +private: + std::map> embeddings_; + std::map> classifier_scores_; + std::map chat_replies_; +}; + +} // namespace testing +} // namespace lemon diff --git a/test/cpp/fixtures/routing/decision_example.json b/test/cpp/fixtures/routing/decision_example.json new file mode 100644 index 000000000..0a8b6c17a --- /dev/null +++ b/test/cpp/fixtures/routing/decision_example.json @@ -0,0 +1,11 @@ +{ + "version": "1", + "route_to": "Qwen3-8B-GGUF", + "matched_rule": "keep-private", + "default_used": false, + "outputs": { "verdict": "warn" }, + "trace": [ + { "condition": "classifier:pii", "score": 0.81, "result": true }, + { "condition": "keywords_any", "result": false } + ] +} diff --git a/test/cpp/fixtures/routing/l0a_llm_router.json b/test/cpp/fixtures/routing/l0a_llm_router.json new file mode 100644 index 000000000..1828f57fb --- /dev/null +++ b/test/cpp/fixtures/routing/l0a_llm_router.json @@ -0,0 +1,15 @@ +{ + "version": "1", + "model_name": "user.Router-Auto", + "recipe": "collection.router", + "components": ["Qwen3-1.7B-GGUF", "Qwen3-8B-GGUF", "Qwen3.5-35B-A3B-GGUF"], + "routing": { + "candidates": ["Qwen3-8B-GGUF", "Qwen3.5-35B-A3B-GGUF"], + "default_model": "Qwen3-8B-GGUF", + "router": { + "type": "llm", + "model": "Qwen3-1.7B-GGUF", + "prompt": "You route requests. Reply with ONLY a model name. Use Qwen3-8B-GGUF for everyday questions; use Qwen3.5-35B-A3B-GGUF for hard reasoning, coding, or long context." + } + } +} diff --git a/test/cpp/fixtures/routing/l1_keywords.json b/test/cpp/fixtures/routing/l1_keywords.json new file mode 100644 index 000000000..b8664d0a8 --- /dev/null +++ b/test/cpp/fixtures/routing/l1_keywords.json @@ -0,0 +1,20 @@ +{ + "version": "1", + "model_name": "user.Router-Keywords", + "recipe": "collection.router", + "components": ["Qwen3-8B-GGUF", "vllm.qwen3-32b"], + "routing": { + "candidates": ["Qwen3-8B-GGUF", "vllm.qwen3-32b"], + "default_model": "Qwen3-8B-GGUF", + "rules": [ + { "id": "code-to-big", + "match": { "any": [ + { "keywords_any": ["def ", "function", "stack trace", "compile"] }, + { "regex": "```[a-z]*" } ] }, + "route_to": "vllm.qwen3-32b" }, + { "id": "long-context-to-big", + "match": { "min_chars": 4000 }, + "route_to": "vllm.qwen3-32b" } + ] + } +} diff --git a/test/cpp/fixtures/routing/l2_semantic.json b/test/cpp/fixtures/routing/l2_semantic.json new file mode 100644 index 000000000..0c25d5d39 --- /dev/null +++ b/test/cpp/fixtures/routing/l2_semantic.json @@ -0,0 +1,20 @@ +{ + "version": "1", + "model_name": "user.Router-Semantic", + "recipe": "collection.router", + "components": ["Qwen3-8B-GGUF", "vllm.qwen3-32b", "nomic-embed-text-v1.5-GGUF"], + "routing": { + "candidates": ["Qwen3-8B-GGUF", "vllm.qwen3-32b"], + "default_model": "Qwen3-8B-GGUF", + "classifiers": [ + { "id": "is_coding", "type": "semantic_similarity", + "model": "nomic-embed-text-v1.5-GGUF", + "candidates": ["write a function", "fix this bug", "refactor this code", "time complexity"] } + ], + "rules": [ + { "id": "coding-to-big", + "match": { "classifier": "is_coding", "min_score": 0.78 }, + "route_to": "vllm.qwen3-32b" } + ] + } +} diff --git a/test/cpp/fixtures/routing/l3_classifier.json b/test/cpp/fixtures/routing/l3_classifier.json new file mode 100644 index 000000000..1d16eaced --- /dev/null +++ b/test/cpp/fixtures/routing/l3_classifier.json @@ -0,0 +1,24 @@ +{ + "version": "1", + "model_name": "user.Router-Classify", + "recipe": "collection.router", + "components": ["Qwen3-8B-GGUF", "vllm.qwen3-32b", "pii-detector-small", "jailbreak-detector-small"], + "routing": { + "candidates": ["Qwen3-8B-GGUF", "vllm.qwen3-32b"], + "default_model": "vllm.qwen3-32b", + "classifiers": [ + { "id": "pii", "type": "classifier", "model": "pii-detector-small", + "labels": ["PII", "NO_PII"], "default_label": "PII", "on_error": "match_true" }, + { "id": "jailbreak", "type": "classifier", "model": "jailbreak-detector-small", + "labels": ["JAILBREAK", "BENIGN"], "default_label": "JAILBREAK", "on_error": "match_true" } + ], + "rules": [ + { "id": "sensitive-stays-local", + "match": { "any": [ + { "classifier": "pii", "min_score": 0.5 }, + { "classifier": "jailbreak", "min_score": 0.5 } ] }, + "route_to": "Qwen3-8B-GGUF", + "outputs": { "verdict": "warn" } } + ] + } +} diff --git a/test/cpp/test_routing_policy_contract.cpp b/test/cpp/test_routing_policy_contract.cpp new file mode 100644 index 000000000..7effac061 --- /dev/null +++ b/test/cpp/test_routing_policy_contract.cpp @@ -0,0 +1,302 @@ +// Foundation contract test for the generic routing engine. +// +// This test asserts the CONTRACT SURFACE only — there is no engine behavior yet +// (the evaluator, registry, and assembly are implemented separately). It proves: +// 1. routing_policy.h compiles standalone with no backend/Router include. +// 2. Every contract type constructs and round-trips its fields. +// 3. The fake ClassifierServices satisfies the injection seam and is callable. +// 4. RoutingPolicyEngine is constructible (route() is intentionally not called). +// 5. The committed L0a-L3 fixtures parse and satisfy the locked structural +// invariants (candidates non-empty; default_model and every route_to are +// candidates; classifier condition refs resolve; router XOR rules present). +// Full JSON-Schema validation lives in the Python test +// test/test_routing_fixtures.py. +// +// Compile (standalone): +// cl /std:c++17 /EHsc /I src/cpp/include /I build/_deps/json-src/include \ +// /DROUTING_FIXTURE_DIR=... test/cpp/test_routing_policy_contract.cpp + +#include "fake_classifier_services.h" +#include "lemon/routing_policy.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef ROUTING_FIXTURE_DIR +#define ROUTING_FIXTURE_DIR "test/cpp/fixtures/routing" +#endif + +using lemon::ClassifierServices; +using lemon::Condition; +using lemon::ConditionPtr; +using lemon::Decision; +using lemon::EvalContext; +using lemon::LeafFactory; +using lemon::MatchExpr; +using lemon::OnError; +using lemon::RouteContext; +using lemon::RoutePolicy; +using lemon::RoutingPolicyEngine; +using lemon::Rule; +using lemon::Score; +using lemon::TraceEntry; +using lemon::json; + +static int g_failures = 0; + +static void check(bool cond, const char* what) { + std::printf("[%s] %s\n", cond ? "PASS" : "FAIL", what); + if (!cond) ++g_failures; +} + +static json load_json(const std::string& name) { + std::string path = std::string(ROUTING_FIXTURE_DIR) + "/" + name; + std::ifstream in(path); + if (!in) { + std::printf("[FAIL] could not open fixture %s\n", path.c_str()); + ++g_failures; + return json::object(); + } + std::stringstream ss; + ss << in.rdbuf(); + return json::parse(ss.str(), nullptr, /*allow_exceptions=*/false); +} + +// --------------------------------------------------------------------------- +// 1-2. Contract types construct and carry their fields. +// --------------------------------------------------------------------------- +static void test_types_construct() { + RouteContext ctx; + ctx.input = "write a function to reverse a list"; + ctx.params.model = "user.Router-Keywords"; + ctx.params.has_tools = true; + ctx.params.has_images = false; + ctx.params.chars = ctx.input.size(); + ctx.metadata["task_class"] = "payment"; + check(ctx.metadata.at("task_class") == "payment", "RouteContext carries metadata"); + check(ctx.params.chars == ctx.input.size(), "RouteContext params carry char count"); + + // Score helpers (the label->score contract). + Score s; + s.labels["PII"] = 0.81; + s.labels["NO_PII"] = 0.19; + check(s.ok, "Score defaults ok=true"); + check(s.score_of("PII") == 0.81, "Score::score_of returns the labeled score"); + check(s.score_of("missing") == 0.0, "Score::score_of returns 0 for absent label"); + + Score sim; + sim.labels[""] = 0.73; // semantic_similarity single-score shape + check(sim.primary() == 0.73, "Score::primary reads the empty-key cosine score"); + + // on_error round-trips through the single-source-of-truth mapping. + check(lemon::parse_on_error("match_true") == OnError::MatchTrue, "parse_on_error match_true"); + check(lemon::parse_on_error("match_false") == OnError::MatchFalse, "parse_on_error match_false"); + check(std::string(lemon::on_error_to_string(OnError::MatchTrue)) == "match_true", + "on_error_to_string round-trips"); + + // Match AST: any[ leaf, not(leaf) ]. + MatchExpr leaf; + leaf.op = MatchExpr::Op::Leaf; + leaf.leaf = json{{"keywords_any", {"def ", "function"}}}; + MatchExpr neg; + neg.op = MatchExpr::Op::Not; + neg.children.push_back(leaf); + MatchExpr expr; + expr.op = MatchExpr::Op::Any; + expr.children = {leaf, neg}; + check(expr.children.size() == 2, "MatchExpr nests children"); + check(expr.children[1].op == MatchExpr::Op::Not, "MatchExpr carries Not node"); + + Rule rule; + rule.id = "code-to-big"; + rule.match = expr; + rule.route_to = "vllm.qwen3-32b"; + rule.outputs = json{{"verdict", "warn"}}; + check(rule.outputs.at("verdict") == "warn", "Rule carries opaque outputs bag"); + + // Decision + trace shape. + Decision d; + d.route_to = "Qwen3-8B-GGUF"; + d.matched_rule = "keep-private"; + d.default_used = false; + d.outputs = json{{"verdict", "warn"}}; + d.trace.push_back(TraceEntry{"classifier:pii", 0.81, true}); + d.trace.push_back(TraceEntry{"keywords_any", std::nullopt, false}); + check(d.trace.size() == 2, "Decision carries trace entries"); + check(d.trace[0].score.has_value() && *d.trace[0].score == 0.81, + "TraceEntry carries optional classifier score"); + check(!d.trace[1].score.has_value(), "TraceEntry score is absent for deterministic leaf"); +} + +// --------------------------------------------------------------------------- +// 2b. The Condition / EvalContext / LeafFactory evaluation seam. +// --------------------------------------------------------------------------- +namespace { +// A trivial leaf Condition: stands in for the deterministic/classifier-band +// conditions built downstream, proving the interface is implementable here. +struct ConstCondition : Condition { + bool value; + explicit ConstCondition(bool v) : value(v) {} + bool evaluate(EvalContext& ctx) const override { + if (ctx.want_trace) ctx.trace.push_back(TraceEntry{"const", std::nullopt, value}); + return value; + } +}; +} // namespace + +static void test_condition_seam() { + RouteContext ctx; + ctx.input = "hi"; + lemon::testing::FakeClassifierServices fake; + ClassifierServices svc = fake.make(); + + EvalContext ec{ctx, svc}; + ec.want_trace = true; + ec.memo["pii"] = Score{}; // memo is keyed by classifier id + check(ec.memo.count("pii") == 1, "EvalContext memo is keyed by classifier id"); + + ConditionPtr cond = std::make_shared(true); + check(cond->evaluate(ec), "Condition::evaluate is callable through the interface"); + check(ec.trace.size() == 1 && ec.trace[0].condition == "const", + "a Condition appends to EvalContext::trace when want_trace"); + + LeafFactory factory = [](const json& leaf) -> ConditionPtr { + return std::make_shared(leaf.value("v", false)); + }; + ConditionPtr built = factory(json{{"v", true}}); + check(static_cast(built) && built->evaluate(ec), + "LeafFactory builds a leaf Condition from leaf JSON"); +} + +// --------------------------------------------------------------------------- +// 3. The fake ClassifierServices satisfies the injection seam. +// --------------------------------------------------------------------------- +static void test_fake_services() { + lemon::testing::FakeClassifierServices fake; + fake.set_embedding("nomic-embed-text-v1.5-GGUF", {0.0f, 1.0f, 0.0f}); + fake.set_classifier_scores("pii-detector-small", {{"PII", 0.9}, {"NO_PII", 0.1}}); + fake.set_chat_reply("Qwen3-1.7B-GGUF", "Qwen3.5-35B-A3B-GGUF"); + + ClassifierServices svc = fake.make(); + check(static_cast(svc.embed) && static_cast(svc.run_classifier) && + static_cast(svc.chat), + "ClassifierServices exposes embed/run_classifier/chat"); + + auto vec = svc.embed("nomic-embed-text-v1.5-GGUF", "anything"); + check(vec.size() == 3 && vec[1] == 1.0f, "fake embed returns configured vector"); + + auto scores = svc.run_classifier("pii-detector-small", "my ssn is ..."); + check(scores.at("PII") == 0.9, "fake run_classifier returns configured scores"); + + auto reply = svc.chat("Qwen3-1.7B-GGUF", "route this", "hard reasoning task"); + check(reply == "Qwen3.5-35B-A3B-GGUF", "fake chat returns configured reply"); +} + +// --------------------------------------------------------------------------- +// 4. The engine is constructible against the contract (route() not called). +// --------------------------------------------------------------------------- +static void test_engine_constructs() { + RoutePolicy policy; + policy.candidates = {"Qwen3-8B-GGUF", "vllm.qwen3-32b"}; + policy.default_model = "Qwen3-8B-GGUF"; + + lemon::testing::FakeClassifierServices fake; + RoutingPolicyEngine engine(std::move(policy), fake.make()); + check(engine.policy().candidates.size() == 2, "RoutingPolicyEngine is constructible"); + check(engine.policy().default_model == "Qwen3-8B-GGUF", "engine exposes its policy"); +} + +// --------------------------------------------------------------------------- +// 5. Fixtures parse and satisfy the locked structural invariants. +// --------------------------------------------------------------------------- +static void validate_fixture(const std::string& name) { + json doc = load_json(name); + std::string tag = "fixture " + name + ":"; + + if (!doc.is_object() || !doc.contains("routing")) { + check(false, (tag + " parses with a routing block").c_str()); + return; + } + check(doc.value("version", "") == "1", (tag + " declares schema version 1").c_str()); + check(doc.value("recipe", "") == "collection.router", + (tag + " recipe is collection.router").c_str()); + + const json& routing = doc["routing"]; + std::set candidates; + for (const auto& c : routing.value("candidates", json::array())) { + candidates.insert(c.get()); + } + check(!candidates.empty(), (tag + " candidates is non-empty").c_str()); + + const std::string default_model = routing.value("default_model", std::string{}); + check(candidates.count(default_model) == 1, + (tag + " default_model is a candidate").c_str()); + + // router XOR rules (the anyOf in the schema; lean local form uses one). + const bool has_router = routing.contains("router"); + const bool has_rules = routing.contains("rules"); + check(has_router || has_rules, (tag + " declares router or rules").c_str()); + + // Every rule.route_to is a candidate. + for (const auto& rule : routing.value("rules", json::array())) { + const std::string route_to = rule.value("route_to", std::string{}); + check(candidates.count(route_to) == 1, + (tag + " rule '" + rule.value("id", std::string{}) + + "' routes to a candidate").c_str()); + } + + // Classifier condition refs resolve against declared classifier ids. + std::set classifier_ids; + for (const auto& c : routing.value("classifiers", json::array())) { + classifier_ids.insert(c.value("id", std::string{})); + } + std::function check_refs = [&](const json& expr) { + if (!expr.is_object()) return; + if (expr.contains("classifier")) { + check(classifier_ids.count(expr["classifier"].get()) == 1, + (tag + " classifier ref '" + + expr["classifier"].get() + "' resolves").c_str()); + } + for (const char* op : {"any", "all"}) { + if (expr.contains(op)) { + for (const auto& child : expr[op]) check_refs(child); + } + } + if (expr.contains("not")) check_refs(expr["not"]); + }; + for (const auto& rule : routing.value("rules", json::array())) { + if (rule.contains("match")) check_refs(rule["match"]); + } +} + +static void test_fixtures() { + for (const char* name : {"l0a_llm_router.json", "l1_keywords.json", + "l2_semantic.json", "l3_classifier.json"}) { + validate_fixture(name); + } + // Decision example parses and carries the locked keys. + json dec = load_json("decision_example.json"); + check(dec.value("version", "") == "1" && dec.contains("route_to") && + dec.contains("matched_rule") && dec.contains("default_used") && + dec.contains("trace"), + "decision_example.json carries version/route_to/matched_rule/default_used/trace"); +} + +int main() { + test_types_construct(); + test_condition_seam(); + test_fake_services(); + test_engine_constructs(); + test_fixtures(); + std::printf("\n%s\n", g_failures == 0 ? "ALL CONTRACT CHECKS PASSED" + : "CONTRACT CHECKS FAILED"); + return g_failures == 0 ? 0 : 1; +} diff --git a/test/requirements.txt b/test/requirements.txt index 11125caa1..6b5687e99 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -25,3 +25,6 @@ websockets # Ollama client library (used for Ollama API compatibility tests) ollama + +# JSON Schema validator (required for routing policy / decision fixture tests) +jsonschema diff --git a/test/test_routing_fixtures.py b/test/test_routing_fixtures.py new file mode 100644 index 000000000..07e9785e8 --- /dev/null +++ b/test/test_routing_fixtures.py @@ -0,0 +1,89 @@ +"""Validate the committed routing fixtures against the frozen JSON Schemas. + +Acceptance: the lean L0a-L3 ``collection.router`` example fixtures validate +against the route-policy schema, and the decision example validates against the +decision schema. + +Run: python test/test_routing_fixtures.py +or: python -m unittest test.test_routing_fixtures +""" + +import json +import os +import unittest + +import jsonschema + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SCHEMA_DIR = os.path.join(REPO_ROOT, "src", "cpp", "resources", "schemas") +FIXTURE_DIR = os.path.join(REPO_ROOT, "test", "cpp", "fixtures", "routing") + +ROUTE_POLICY_FIXTURES = [ + "l0a_llm_router.json", + "l1_keywords.json", + "l2_semantic.json", + "l3_classifier.json", +] + + +def _load(path): + with open(path, "r", encoding="utf-8") as handle: + return json.load(handle) + + +class RoutingFixtureSchemaTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.route_policy_schema = _load( + os.path.join(SCHEMA_DIR, "route_policy.schema.json") + ) + cls.decision_schema = _load(os.path.join(SCHEMA_DIR, "decision.schema.json")) + # Fail loudly if a schema is itself malformed. + jsonschema.Draft202012Validator.check_schema(cls.route_policy_schema) + jsonschema.Draft202012Validator.check_schema(cls.decision_schema) + + def test_route_policy_fixtures_validate(self): + validator = jsonschema.Draft202012Validator(self.route_policy_schema) + for name in ROUTE_POLICY_FIXTURES: + with self.subTest(fixture=name): + doc = _load(os.path.join(FIXTURE_DIR, name)) + errors = sorted(validator.iter_errors(doc), key=lambda e: e.path) + self.assertEqual( + errors, + [], + msg="\n".join(f"{list(e.path)}: {e.message}" for e in errors), + ) + + def test_decision_example_validates(self): + doc = _load(os.path.join(FIXTURE_DIR, "decision_example.json")) + jsonschema.validate(doc, self.decision_schema) + + def test_locked_structural_invariants(self): + """Cross-field invariants the JSON Schema cannot express: default_model + and every rule.route_to must be a candidate; classifier condition refs + must resolve.""" + for name in ROUTE_POLICY_FIXTURES: + with self.subTest(fixture=name): + routing = _load(os.path.join(FIXTURE_DIR, name))["routing"] + candidates = set(routing["candidates"]) + self.assertIn(routing["default_model"], candidates) + + classifier_ids = {c["id"] for c in routing.get("classifiers", [])} + for rule in routing.get("rules", []): + self.assertIn(rule["route_to"], candidates) + self._assert_refs_resolve(rule["match"], classifier_ids) + + def _assert_refs_resolve(self, expr, classifier_ids): + if not isinstance(expr, dict): + return + if "classifier" in expr: + self.assertIn(expr["classifier"], classifier_ids) + for op in ("any", "all"): + for child in expr.get(op, []): + self._assert_refs_resolve(child, classifier_ids) + if "not" in expr: + self._assert_refs_resolve(expr["not"], classifier_ids) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/test/test_schema_lock.py b/test/test_schema_lock.py new file mode 100644 index 000000000..147b0d8ac --- /dev/null +++ b/test/test_schema_lock.py @@ -0,0 +1,110 @@ +"""Frozen-major schema lock — a shipped schema major must not change silently. + +Back-compat hard rule: never edit or delete a released schema major; evolve only +by adding a new major (vN+1) schema file. This test makes that rule mechanical +(a snapshot guard, the cheap analog of `buf breaking`): it hashes a canonical +form of each versioned schema and compares it to ``schema-lock.json``. + +Behavior on a hash mismatch: + * ``released: true`` -> HARD FAILURE. A released major is immutable; ship a new + major schema file instead of editing this one. + * ``released: false`` -> the schema is still under development; the edit is + allowed but you must refresh the lock in the SAME change: + python test/test_schema_lock.py --update + That turns every schema edit into a visible, reviewed + lockfile diff instead of silent drift. + +Hashing a canonical (sorted-key, whitespace-stripped) JSON serialization means +the lock tracks *semantic* content only — reformatting or LF/CRLF differences do +not trip it, but any real structural change does. + +Run: python test/test_schema_lock.py # check (also via unittest) + python test/test_schema_lock.py --update # refresh lock for unreleased majors +""" + +import hashlib +import json +import os +import sys +import unittest + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SCHEMA_DIR = os.path.join(REPO_ROOT, "src", "cpp", "resources", "schemas") +LOCK_PATH = os.path.join(SCHEMA_DIR, "schema-lock.json") + + +def canonical_hash(schema_path): + """sha256 of a canonical JSON serialization (sorted keys, no whitespace).""" + with open(schema_path, "r", encoding="utf-8") as handle: + obj = json.load(handle) + canonical = json.dumps(obj, sort_keys=True, separators=(",", ":")) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def load_lock(): + with open(LOCK_PATH, "r", encoding="utf-8") as handle: + return json.load(handle) + + +def update_lock(): + """Refresh hashes for every locked schema, preserving `released` flags. + + Refuses to silently re-lock a released major — that would defeat the guard. + """ + lock = load_lock() + changed = [] + for name, entry in lock.items(): + new_hash = canonical_hash(os.path.join(SCHEMA_DIR, name)) + if new_hash == entry["sha256"]: + continue + if entry.get("released"): + raise SystemExit( + f"refusing to re-lock released major '{name}': a released schema is " + f"immutable — ship a new major schema file instead of editing it." + ) + entry["sha256"] = new_hash + changed.append(name) + with open(LOCK_PATH, "w", encoding="utf-8") as handle: + json.dump(lock, handle, indent=2) + handle.write("\n") + print("updated lock for:", ", ".join(changed) if changed else "(no changes)") + + +class SchemaLockTest(unittest.TestCase): + def test_every_schema_is_locked(self): + lock = load_lock() + on_disk = {f for f in os.listdir(SCHEMA_DIR) if f.endswith(".schema.json")} + self.assertEqual( + on_disk, + set(lock), + msg="a *.schema.json file is not tracked in schema-lock.json (or vice " + "versa); add it to the lock so it cannot change unnoticed", + ) + + def test_schemas_match_lock(self): + lock = load_lock() + for name, entry in lock.items(): + with self.subTest(schema=name): + actual = canonical_hash(os.path.join(SCHEMA_DIR, name)) + if actual == entry["sha256"]: + continue + if entry.get("released"): + self.fail( + f"released schema major '{name}' changed. A released major is " + f"immutable — do NOT edit it; ship a new major (vN+1) schema " + f"file. A changed lock for a released major is a breaking " + f"change and must be reviewed as one." + ) + self.fail( + f"schema '{name}' changed but its lock was not refreshed. If this " + f"is an intentional pre-release edit, run " + f"`python test/test_schema_lock.py --update` and commit the " + f"updated schema-lock.json in the same change." + ) + + +if __name__ == "__main__": + if "--update" in sys.argv: + update_lock() + else: + unittest.main(verbosity=2) From bb7cad3d89c5158ee760abcc34fe24cfc32f9fc5 Mon Sep 17 00:00:00 2001 From: ramkrishna2910 Date: Thu, 25 Jun 2026 12:43:55 -0700 Subject: [PATCH 2/7] ci: run routing foundation tests on every PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The routing contract test and schema guards were added but nothing in CI ran them — making the schema-lock and fixture validation decorative. Wire them in: - cpp_server_build_test_release.yml: build test_routing_policy_contract and add RoutingPolicyContract to the ctest filter (both the Debian and macOS C++ unit test steps). - routing_schema_tests.yml: new fast, server-less PR lane that runs test/test_routing_fixtures.py (fixtures validate against the schemas) and test/test_schema_lock.py (frozen-major snapshot guard). Verified in-tree: cmake builds the target, ctest -R RoutingPolicyContract passes, and both python tests pass as invoked. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../cpp_server_build_test_release.yml | 6 ++-- .github/workflows/routing_schema_tests.yml | 31 +++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/routing_schema_tests.yml diff --git a/.github/workflows/cpp_server_build_test_release.yml b/.github/workflows/cpp_server_build_test_release.yml index 807461125..36ceb1b4e 100644 --- a/.github/workflows/cpp_server_build_test_release.yml +++ b/.github/workflows/cpp_server_build_test_release.yml @@ -276,8 +276,9 @@ jobs: cmake --preset default cmake --build --preset default --target test_directory_watcher cmake --build --preset default --target test_latest_version_fallback + cmake --build --preset default --target test_routing_policy_contract cd build - ctest --output-on-failure -R "DirectoryWatcher|LatestVersionFallback" + ctest --output-on-failure -R "DirectoryWatcher|LatestVersionFallback|RoutingPolicyContract" - name: Upload .deb package uses: actions/upload-artifact@v7 @@ -536,8 +537,9 @@ jobs: run: | cmake --build --preset default --target test_directory_watcher cmake --build --preset default --target test_latest_version_fallback + cmake --build --preset default --target test_routing_policy_contract cd build - ctest --output-on-failure -R "DirectoryWatcher|LatestVersionFallback" + ctest --output-on-failure -R "DirectoryWatcher|LatestVersionFallback|RoutingPolicyContract" - name: Upload .pkg package if: steps.check_signing.outputs.has_signing == 'true' diff --git a/.github/workflows/routing_schema_tests.yml b/.github/workflows/routing_schema_tests.yml new file mode 100644 index 000000000..aea6468ec --- /dev/null +++ b/.github/workflows/routing_schema_tests.yml @@ -0,0 +1,31 @@ +name: Routing Schema Tests +on: + push: + branches: ["main"] + pull_request: + merge_group: + +permissions: + contents: read + +jobs: + routing-schema-tests: + name: Routing schema + fixtures + runs-on: ubuntu-latest + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + steps: + - uses: actions/checkout@v5 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install dependencies + run: python -m pip install --upgrade pip jsonschema + # Pure-python, no server binary needed — fast PR gate for the routing + # policy/decision schemas. Keeps the back-compat guardrails enforced. + - name: Validate fixtures against the JSON schemas + run: python test/test_routing_fixtures.py + - name: Verify frozen-major schema lock + run: python test/test_schema_lock.py From 8edc2fb6b93c26f434324247c5e846d60ee5e072 Mon Sep 17 00:00:00 2001 From: ramkrishna2910 Date: Thu, 25 Jun 2026 13:13:16 -0700 Subject: [PATCH 3/7] schema: rename semantic_similarity `candidates` -> `reference_phrases` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `candidates` was overloaded: routing.candidates is the routing-target set (a core concept), while the semantic_similarity classifier reused the same word for its exemplar phrases — visibly ambiguous in the L2 fixture, which had `candidates` meaning two different things. Rename the classifier field to `reference_phrases` (per review on #2375); routing.candidates is unchanged. Lock refreshed. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/cpp/resources/schemas/route_policy.schema.json | 4 ++-- src/cpp/resources/schemas/schema-lock.json | 2 +- test/cpp/fixtures/routing/l2_semantic.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cpp/resources/schemas/route_policy.schema.json b/src/cpp/resources/schemas/route_policy.schema.json index b610a7757..3f5414534 100644 --- a/src/cpp/resources/schemas/route_policy.schema.json +++ b/src/cpp/resources/schemas/route_policy.schema.json @@ -95,8 +95,8 @@ "description": "Used when a condition omits `label`.", "type": "string" }, - "candidates": { - "description": "semantic_similarity only: exemplar phrases embedded once and compared by cosine.", + "reference_phrases": { + "description": "semantic_similarity only: exemplar phrases embedded once and compared to the input by cosine. Named distinctly from routing.candidates (the routing targets) to avoid overloading that core term.", "type": "array", "items": { "type": "string" } }, diff --git a/src/cpp/resources/schemas/schema-lock.json b/src/cpp/resources/schemas/schema-lock.json index 254adeaf8..6c9664180 100644 --- a/src/cpp/resources/schemas/schema-lock.json +++ b/src/cpp/resources/schemas/schema-lock.json @@ -1,6 +1,6 @@ { "route_policy.schema.json": { - "sha256": "95c330125a343f37ac01ada491d0ec767c97ef623fc68fe53d6bb8dfa31f2e26", + "sha256": "df8065f4657a99239bb3855315072fa2a77739d6e1f6a3cb87f2cd4840ca34d8", "released": false }, "decision.schema.json": { diff --git a/test/cpp/fixtures/routing/l2_semantic.json b/test/cpp/fixtures/routing/l2_semantic.json index 0c25d5d39..c99275f21 100644 --- a/test/cpp/fixtures/routing/l2_semantic.json +++ b/test/cpp/fixtures/routing/l2_semantic.json @@ -9,7 +9,7 @@ "classifiers": [ { "id": "is_coding", "type": "semantic_similarity", "model": "nomic-embed-text-v1.5-GGUF", - "candidates": ["write a function", "fix this bug", "refactor this code", "time complexity"] } + "reference_phrases": ["write a function", "fix this bug", "refactor this code", "time complexity"] } ], "rules": [ { "id": "coding-to-big", From 8e0593513b50a0304b5e6941d8e166108f912cd7 Mon Sep 17 00:00:00 2001 From: Slawomir Nowaczyk Date: Thu, 25 Jun 2026 22:25:08 +0200 Subject: [PATCH 4/7] Add metadata route condition syntax to the schema (cherry picked from commit 26b640f35a0e9caecf8bf38af8f4f04bb047120c) --- src/cpp/resources/schemas/README.md | 2 ++ .../schemas/route_policy.schema.json | 33 ++++++++++++++++++- src/cpp/resources/schemas/schema-lock.json | 2 +- test/cpp/fixtures/routing/l1_metadata.json | 20 +++++++++++ test/cpp/test_routing_policy_contract.cpp | 3 +- test/test_routing_fixtures.py | 1 + 6 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 test/cpp/fixtures/routing/l1_metadata.json diff --git a/src/cpp/resources/schemas/README.md b/src/cpp/resources/schemas/README.md index 0b24259b8..1a1ec25e2 100644 --- a/src/cpp/resources/schemas/README.md +++ b/src/cpp/resources/schemas/README.md @@ -75,6 +75,7 @@ v1** (pinned in the schema field descriptions): | `regex` | **ECMAScript** dialect (`std::regex`) | | `min_score` / `max_score` | **inclusive** band (`>=` / `<=`); default `min_score: 0.5` when neither bound is given | | `min_chars` / `max_chars` | input length in **UTF-8 bytes** (not code points) | +| `metadata` | reads a request `metadata` key; **case-sensitive** comparison, value decoded into a comma-split, trimmed **token set** (`equals` exact / `any` set-intersection / `exists` presence) | | `on_error` (omitted) | default **`match_false`** (fail-open) | | `routing.router` desugaring | expansion to one `llm` classifier + identity rules is deterministic and behavior-equivalent across versions | @@ -92,6 +93,7 @@ redistribution. Fixtures live in `test/cpp/fixtures/routing/`: |---------|-------|-----------| | `l0a_llm_router.json` | L0(a) | `routing.router` LLM-as-router (desugars to one `llm` classifier + identity rules) | | `l1_keywords.json` | L1 | Deterministic `keywords_any` / `regex` / `min_chars` | +| `l1_metadata.json` | L1 | Deterministic `metadata` match on caller-supplied routing inputs (`task_class` / `consent`) | | `l2_semantic.json` | L2 | `semantic_similarity` classifier (embeddings + cosine) | | `l3_classifier.json` | L3 | Model-backed `classifier` (PII / jailbreak) | | `decision_example.json` | — | A `Decision` + `trace` | diff --git a/src/cpp/resources/schemas/route_policy.schema.json b/src/cpp/resources/schemas/route_policy.schema.json index 3f5414534..5342dcb3f 100644 --- a/src/cpp/resources/schemas/route_policy.schema.json +++ b/src/cpp/resources/schemas/route_policy.schema.json @@ -168,9 +168,40 @@ "minimum": 0 }, "has_tools": { "type": "boolean" }, - "has_images": { "type": "boolean" } + "has_images": { "type": "boolean" }, + "metadata": { "$ref": "#/$defs/metadata_match" } }, "additionalProperties": false + }, + "metadata_match": { + "description": "Deterministic leaf matching a caller-supplied OpenAI `metadata` key (e.g. task_class, consent), read verbatim from the request metadata map. Frozen v1 semantics: values are compared case-sensitively; a value is decoded into a token set by splitting on comma and trimming, so scalar and list-valued metadata match uniformly. Exactly one comparator (equals / any / exists) must be present. A future comparator (regex, all, ...) ships as a new key, never a redefinition of these.", + "type": "object", + "required": ["key"], + "properties": { + "key": { + "description": "The metadata key to read. A missing or empty value matches only `exists: false`.", + "type": "string" + }, + "equals": { + "description": "True if the raw metadata value equals this string exactly (case-sensitive).", + "type": "string" + }, + "any": { + "description": "True if the value's comma-decoded token set intersects this list (handles both scalar and comma-encoded list-valued metadata).", + "type": "array", + "items": { "type": "string" } + }, + "exists": { + "description": "exists:true matches when the key is present and non-empty; exists:false matches when it is absent or empty.", + "type": "boolean" + } + }, + "oneOf": [ + { "required": ["equals"] }, + { "required": ["any"] }, + { "required": ["exists"] } + ], + "additionalProperties": false } } } diff --git a/src/cpp/resources/schemas/schema-lock.json b/src/cpp/resources/schemas/schema-lock.json index 6c9664180..8a2832252 100644 --- a/src/cpp/resources/schemas/schema-lock.json +++ b/src/cpp/resources/schemas/schema-lock.json @@ -1,6 +1,6 @@ { "route_policy.schema.json": { - "sha256": "df8065f4657a99239bb3855315072fa2a77739d6e1f6a3cb87f2cd4840ca34d8", + "sha256": "a1889b17aba5c1f636aaf71dcbc3f642134e35eb5dfd3eb69fad0f302449c14d", "released": false }, "decision.schema.json": { diff --git a/test/cpp/fixtures/routing/l1_metadata.json b/test/cpp/fixtures/routing/l1_metadata.json new file mode 100644 index 000000000..7a1bc2f10 --- /dev/null +++ b/test/cpp/fixtures/routing/l1_metadata.json @@ -0,0 +1,20 @@ +{ + "version": "1", + "model_name": "user.Router-Metadata", + "recipe": "collection.router", + "components": ["Qwen3-8B-GGUF", "vllm.qwen3-32b"], + "routing": { + "candidates": ["Qwen3-8B-GGUF", "vllm.qwen3-32b"], + "default_model": "vllm.qwen3-32b", + "rules": [ + { "id": "sensitive-task-stays-local", + "match": { "any": [ + { "metadata": { "key": "task_class", "any": ["payment", "checkout"] } }, + { "metadata": { "key": "consent", "equals": "denied" } } ] }, + "route_to": "Qwen3-8B-GGUF" }, + { "id": "no-consent-recorded-stays-local", + "match": { "metadata": { "key": "consent", "exists": false } }, + "route_to": "Qwen3-8B-GGUF" } + ] + } +} diff --git a/test/cpp/test_routing_policy_contract.cpp b/test/cpp/test_routing_policy_contract.cpp index 7effac061..20a7eaba8 100644 --- a/test/cpp/test_routing_policy_contract.cpp +++ b/test/cpp/test_routing_policy_contract.cpp @@ -279,7 +279,8 @@ static void validate_fixture(const std::string& name) { static void test_fixtures() { for (const char* name : {"l0a_llm_router.json", "l1_keywords.json", - "l2_semantic.json", "l3_classifier.json"}) { + "l1_metadata.json", "l2_semantic.json", + "l3_classifier.json"}) { validate_fixture(name); } // Decision example parses and carries the locked keys. diff --git a/test/test_routing_fixtures.py b/test/test_routing_fixtures.py index 07e9785e8..82bc9d338 100644 --- a/test/test_routing_fixtures.py +++ b/test/test_routing_fixtures.py @@ -21,6 +21,7 @@ ROUTE_POLICY_FIXTURES = [ "l0a_llm_router.json", "l1_keywords.json", + "l1_metadata.json", "l2_semantic.json", "l3_classifier.json", ] From a145cac055b75bde875663e4258fc42d4a1562bb Mon Sep 17 00:00:00 2001 From: ramkrishna2910 Date: Thu, 25 Jun 2026 14:53:29 -0700 Subject: [PATCH 5/7] schema: address review nits (request schema, score bounds, type reqs) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From @eddierichter-amd's review on #2408: - Add request.schema.json + request_example.json fixture: covers the request-side extension surface from #2376 (OpenAI `metadata` with string values + optional `route_trace`) that was missing — completes the schema deliverable. Validates only the extension fields (additionalProperties: true); no version (rides on the stock OpenAI request). - Constrain `min_score`/`max_score` to [0, 1] (the Score contract range). - Classifier type-specific required fields via if/then: semantic_similarity needs reference_phrases, classifier needs model, llm needs model + prompt — moving type validation into the schema instead of leaving it to the parser. Lock refreshed; README + both test harnesses updated (incl. a non-string-metadata rejection test and a conditional-requirement test). Fixes a dangling-temporary bug in the request check (nlohmann .items() on a .value() temporary). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/cpp/resources/schemas/README.md | 1 + src/cpp/resources/schemas/request.schema.json | 19 ++++++++++++ .../schemas/route_policy.schema.json | 18 +++++++++-- src/cpp/resources/schemas/schema-lock.json | 6 +++- .../cpp/fixtures/routing/request_example.json | 6 ++++ test/cpp/test_routing_policy_contract.cpp | 10 ++++++ test/test_routing_fixtures.py | 31 +++++++++++++++++++ 7 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 src/cpp/resources/schemas/request.schema.json create mode 100644 test/cpp/fixtures/routing/request_example.json diff --git a/src/cpp/resources/schemas/README.md b/src/cpp/resources/schemas/README.md index 1a1ec25e2..f040b76d5 100644 --- a/src/cpp/resources/schemas/README.md +++ b/src/cpp/resources/schemas/README.md @@ -9,6 +9,7 @@ decision `trace` — never in the engine. | Schema | Describes | |--------|-----------| | `route_policy.schema.json` | The `routing` block embedded in a `collection.router` collection JSON. Invoked like `collection.omni`: point the OpenAI `model` field at the collection name. | +| `request.schema.json` | The request-side extension fields on the OpenAI chat body: `metadata` (string-valued routing inputs) and the optional `route_trace`. Validates only those fields (`additionalProperties: true`); no `version` (rides on the stock OpenAI request). | | `decision.schema.json` | The `x_lemonade_route` decision object attached additively to the chat response. | ## Versioning diff --git a/src/cpp/resources/schemas/request.schema.json b/src/cpp/resources/schemas/request.schema.json new file mode 100644 index 000000000..5ceaf10b2 --- /dev/null +++ b/src/cpp/resources/schemas/request.schema.json @@ -0,0 +1,19 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://lemonade-sdk.github.io/schemas/request.schema.json", + "title": "Lemonade routing request extension", + "description": "The lemonade-specific fields a client adds to a standard OpenAI chat-completions body when addressing a collection.router model: routing inputs on `metadata` and the optional `route_trace` opt-in. Validates ONLY these extension fields — the rest of the OpenAI body (model, messages, ...) passes through untouched (additionalProperties: true). No `version` field: this rides on the stock OpenAI request for maximum drop-in, so there is no document the client versions.", + "type": "object", + "properties": { + "metadata": { + "description": "Caller-supplied routing inputs exposed verbatim to conditions (e.g. task_class, consent). Values are strings only; list values are comma-encoded (decoded into a token set by the metadata condition).", + "type": "object", + "additionalProperties": { "type": "string" } + }, + "route_trace": { + "description": "Opt-in: when true, the response carries the full per-condition trace. Default omitted; minimal trace otherwise.", + "type": "boolean" + } + }, + "additionalProperties": true +} diff --git a/src/cpp/resources/schemas/route_policy.schema.json b/src/cpp/resources/schemas/route_policy.schema.json index 5342dcb3f..5f021df93 100644 --- a/src/cpp/resources/schemas/route_policy.schema.json +++ b/src/cpp/resources/schemas/route_policy.schema.json @@ -106,7 +106,15 @@ "default": "match_false" } }, - "additionalProperties": false + "additionalProperties": false, + "allOf": [ + { "if": { "required": ["type"], "properties": { "type": { "const": "semantic_similarity" } } }, + "then": { "required": ["model", "reference_phrases"] } }, + { "if": { "required": ["type"], "properties": { "type": { "const": "classifier" } } }, + "then": { "required": ["model"] } }, + { "if": { "required": ["type"], "properties": { "type": { "const": "llm" } } }, + "then": { "required": ["model", "prompt"] } } + ] }, "rule": { "type": "object", @@ -137,11 +145,15 @@ "label": { "type": "string" }, "min_score": { "description": "Frozen v1 semantics: inclusive lower bound (score >= min_score). When neither min_score nor max_score is given, the band defaults to min_score: 0.5.", - "type": "number" + "type": "number", + "minimum": 0, + "maximum": 1 }, "max_score": { "description": "Frozen v1 semantics: inclusive upper bound (score <= max_score).", - "type": "number" + "type": "number", + "minimum": 0, + "maximum": 1 }, "keywords_any": { "description": "True if ANY keyword is present. Frozen v1 match: case-insensitive substring over the input text. Token/BM25 matching is a future, separately named op, never a redefinition of this one.", diff --git a/src/cpp/resources/schemas/schema-lock.json b/src/cpp/resources/schemas/schema-lock.json index 8a2832252..89d96d3af 100644 --- a/src/cpp/resources/schemas/schema-lock.json +++ b/src/cpp/resources/schemas/schema-lock.json @@ -1,10 +1,14 @@ { "route_policy.schema.json": { - "sha256": "a1889b17aba5c1f636aaf71dcbc3f642134e35eb5dfd3eb69fad0f302449c14d", + "sha256": "73bc3bd7cd942ba44dbe769cda4f45bfb9d999f967cc28b15ead3f4a7d091858", "released": false }, "decision.schema.json": { "sha256": "bcbc6c56c6e8f1426a774d3318b3409042ad8447d64a553b0b191335b7599150", "released": false + }, + "request.schema.json": { + "sha256": "a8b37f9c1577bfd84e9e1f01241df6a356573a84bc09efdc6a1392a55d4a0fca", + "released": false } } diff --git a/test/cpp/fixtures/routing/request_example.json b/test/cpp/fixtures/routing/request_example.json new file mode 100644 index 000000000..269c9f40c --- /dev/null +++ b/test/cpp/fixtures/routing/request_example.json @@ -0,0 +1,6 @@ +{ + "model": "user.Router-Shopping", + "messages": [{ "role": "user", "content": "how do I return this order?" }], + "metadata": { "task_class": "payment", "site_tags": "shopping" }, + "route_trace": true +} diff --git a/test/cpp/test_routing_policy_contract.cpp b/test/cpp/test_routing_policy_contract.cpp index 20a7eaba8..f0a1f79c3 100644 --- a/test/cpp/test_routing_policy_contract.cpp +++ b/test/cpp/test_routing_policy_contract.cpp @@ -289,6 +289,16 @@ static void test_fixtures() { dec.contains("matched_rule") && dec.contains("default_used") && dec.contains("trace"), "decision_example.json carries version/route_to/matched_rule/default_used/trace"); + + // Request extension example: metadata is a string map, route_trace a bool. + json req = load_json("request_example.json"); + json md = req.contains("metadata") ? req["metadata"] : json::object(); + bool meta_strings = req.contains("metadata") && md.is_object(); + for (auto& kv : md.items()) { + if (!kv.value().is_string()) meta_strings = false; + } + check(meta_strings && req.value("route_trace", false) == true, + "request_example.json carries string-valued metadata and route_trace"); } int main() { diff --git a/test/test_routing_fixtures.py b/test/test_routing_fixtures.py index 82bc9d338..9afbebaa7 100644 --- a/test/test_routing_fixtures.py +++ b/test/test_routing_fixtures.py @@ -39,9 +39,11 @@ def setUpClass(cls): os.path.join(SCHEMA_DIR, "route_policy.schema.json") ) cls.decision_schema = _load(os.path.join(SCHEMA_DIR, "decision.schema.json")) + cls.request_schema = _load(os.path.join(SCHEMA_DIR, "request.schema.json")) # Fail loudly if a schema is itself malformed. jsonschema.Draft202012Validator.check_schema(cls.route_policy_schema) jsonschema.Draft202012Validator.check_schema(cls.decision_schema) + jsonschema.Draft202012Validator.check_schema(cls.request_schema) def test_route_policy_fixtures_validate(self): validator = jsonschema.Draft202012Validator(self.route_policy_schema) @@ -59,6 +61,35 @@ def test_decision_example_validates(self): doc = _load(os.path.join(FIXTURE_DIR, "decision_example.json")) jsonschema.validate(doc, self.decision_schema) + def test_request_example_validates(self): + doc = _load(os.path.join(FIXTURE_DIR, "request_example.json")) + jsonschema.validate(doc, self.request_schema) + + def test_request_rejects_non_string_metadata(self): + """metadata values must be strings (list values are comma-encoded).""" + validator = jsonschema.Draft202012Validator(self.request_schema) + bad = {"metadata": {"task_class": ["payment", "checkout"]}} + self.assertTrue(list(validator.iter_errors(bad))) + + def test_classifier_type_specific_requirements(self): + """Conditional `required` by classifier type: e.g. semantic_similarity + needs reference_phrases, llm needs prompt.""" + validator = jsonschema.Draft202012Validator(self.route_policy_schema) + base = { + "version": "1", + "recipe": "collection.router", + "routing": { + "candidates": ["a"], + "default_model": "a", + "classifiers": [ + {"id": "x", "type": "semantic_similarity", "model": "m"} + ], + "rules": [{"id": "r", "match": {"classifier": "x"}, "route_to": "a"}], + }, + } + # Missing reference_phrases for semantic_similarity -> invalid. + self.assertTrue(list(validator.iter_errors(base))) + def test_locked_structural_invariants(self): """Cross-field invariants the JSON Schema cannot express: default_model and every rule.route_to must be a candidate; classifier condition refs From 4c35610c88eccd401b97cef6166cd959b7fdbf54 Mon Sep 17 00:00:00 2001 From: ramkrishna2910 Date: Thu, 25 Jun 2026 14:57:20 -0700 Subject: [PATCH 6/7] docs: clarify schema-lock release flag vs sign-off Sign-off approves the design and unblocks development; it does not freeze the schema. `released` stays false through implementation (schema still refinable with a reviewed lock refresh) and flips to true at product release, when real policies exist and immutability protects users. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/cpp/resources/schemas/README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cpp/resources/schemas/README.md b/src/cpp/resources/schemas/README.md index f040b76d5..5fa2046db 100644 --- a/src/cpp/resources/schemas/README.md +++ b/src/cpp/resources/schemas/README.md @@ -56,8 +56,11 @@ These are enforced mechanically, not just by convention: schema. A **released** major is immutable — any change fails CI (ship a new major instead). An **unreleased** major may change, but only with a refreshed lock in the same diff (`python test/test_schema_lock.py --update`), so every - schema edit is a visible, reviewed change rather than silent drift. (v1 is - `released: false` until schema sign-off; flip it to `true` at release.) + schema edit is a visible, reviewed change rather than silent drift. (v1 stays + `released: false` through implementation — the design is signed off, but the + schema can still be refined with a reviewed lock refresh until the engine + ships. Flip to `true` at product release, when real policies exist in the wild + and immutability actually protects users.) - `test/test_routing_fixtures.py` keeps the schemas self-valid and the example fixtures conformant. - A frozen conformance corpus (golden policy → expected `Decision`) will enforce From 8fed705e7382ae05057d2fe5f5fe96ca1a746521 Mon Sep 17 00:00:00 2001 From: ramkrishna2910 Date: Thu, 25 Jun 2026 16:30:16 -0700 Subject: [PATCH 7/7] foundation: expose labels() + default_label() on the Classifier contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From @eddierichter-amd scoping #2379: the registry/leaf-factory needs a classifier's declared labels to reject dangling condition `label` refs and to resolve `default_label` when a condition omits `label`. These are intrinsic to the declaration, so they belong on the Classifier (no sidecar metadata table). semantic_similarity declares no labels (scores under the empty-string key), so its labels() is empty and default_label() is nullopt — which correctly makes any `label` ref on a similarity condition invalid. Contract test covers both a labeled classifier and the empty-labels similarity case. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/cpp/include/lemon/routing_policy.h | 19 +++++++++-- test/cpp/test_routing_policy_contract.cpp | 40 +++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/src/cpp/include/lemon/routing_policy.h b/src/cpp/include/lemon/routing_policy.h index 86aec285d..37c49856f 100644 --- a/src/cpp/include/lemon/routing_policy.h +++ b/src/cpp/include/lemon/routing_policy.h @@ -174,13 +174,28 @@ class Classifier { const std::string& type() const { return type_; } OnError on_error() const { return on_error_; } + // Declared output labels and the optional default. Intrinsic to the + // declaration, so the registry resolves condition `label` refs against + // labels() and falls back to default_label() when a condition omits `label` + // — no sidecar metadata table. semantic_similarity declares no labels + // (it scores under the empty-string key, read via Score::primary), so its + // labels() is empty and default_label() is nullopt — which correctly makes + // any `label` ref on a similarity condition invalid. + const std::vector& labels() const { return labels_; } + const std::optional& default_label() const { return default_label_; } + protected: - Classifier(std::string id, std::string type, OnError on_error) - : id_(std::move(id)), type_(std::move(type)), on_error_(on_error) {} + Classifier(std::string id, std::string type, OnError on_error, + std::vector labels = {}, + std::optional default_label = std::nullopt) + : id_(std::move(id)), type_(std::move(type)), on_error_(on_error), + labels_(std::move(labels)), default_label_(std::move(default_label)) {} std::string id_; std::string type_; OnError on_error_ = OnError::MatchFalse; + std::vector labels_; + std::optional default_label_; }; using ClassifierPtr = std::shared_ptr; diff --git a/test/cpp/test_routing_policy_contract.cpp b/test/cpp/test_routing_policy_contract.cpp index f0a1f79c3..d39cad9c5 100644 --- a/test/cpp/test_routing_policy_contract.cpp +++ b/test/cpp/test_routing_policy_contract.cpp @@ -34,6 +34,8 @@ #define ROUTING_FIXTURE_DIR "test/cpp/fixtures/routing" #endif +using lemon::Classifier; +using lemon::ClassifierContext; using lemon::ClassifierServices; using lemon::Condition; using lemon::ConditionPtr; @@ -150,8 +152,45 @@ struct ConstCondition : Condition { return value; } }; + +// A model-backed classifier: declares labels + a default, as the registry reads +// them to resolve condition `label` refs. +struct LabeledClassifier : Classifier { + LabeledClassifier() + : Classifier("pii", "classifier", OnError::MatchTrue, + {"PII", "NO_PII"}, std::string("PII")) {} + Score evaluate(const ClassifierContext&) const override { + Score s; + s.labels["PII"] = 1.0; + return s; + } +}; + +// A semantic_similarity classifier: no declared labels (scores under ""). +struct SimClassifier : Classifier { + SimClassifier() : Classifier("sim", "semantic_similarity", OnError::MatchFalse) {} + Score evaluate(const ClassifierContext&) const override { + Score s; + s.labels[""] = 0.5; + return s; + } +}; } // namespace +static void test_classifier_contract() { + LabeledClassifier lc; + check(lc.id() == "pii" && lc.type() == "classifier", "Classifier carries id/type"); + check(lc.on_error() == OnError::MatchTrue, "Classifier carries on_error"); + check(lc.labels().size() == 2 && lc.labels()[0] == "PII", + "Classifier exposes declared labels for ref resolution"); + check(lc.default_label().has_value() && *lc.default_label() == "PII", + "Classifier exposes default_label"); + + SimClassifier sc; + check(sc.labels().empty() && !sc.default_label().has_value(), + "semantic_similarity declares no labels (any label ref on it is invalid)"); +} + static void test_condition_seam() { RouteContext ctx; ctx.input = "hi"; @@ -304,6 +343,7 @@ static void test_fixtures() { int main() { test_types_construct(); test_condition_seam(); + test_classifier_contract(); test_fake_services(); test_engine_constructs(); test_fixtures();