diff --git a/.github/workflows/robot-validate.yml b/.github/workflows/robot-validate.yml index c7ddf65..c4309eb 100644 --- a/.github/workflows/robot-validate.yml +++ b/.github/workflows/robot-validate.yml @@ -202,6 +202,7 @@ jobs: - ARCO_instances_creditscoring.ttl - ARCO_instances_verification.ttl - ARCO_instances_adversarial_decoy.ttl + - ARCO_instances_adversarial_decoy_5b.ttl - ARCO_instances_flag_tests.ttl steps: diff --git a/.gitignore b/.gitignore index 30926bc..a33dec5 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ # Obsidian knowledge base (local only, not versioned) KB/ +# Defensive: even if KB/ rule changes, never commit vendor node_modules trees +KB/**/node_modules/ # Git metadata (huge) .git/ diff --git a/03_TECHNICAL_CORE/ontology/ARCO_instances_adversarial_decoy_5b.ttl b/03_TECHNICAL_CORE/ontology/ARCO_instances_adversarial_decoy_5b.ttl new file mode 100644 index 0000000..3009460 --- /dev/null +++ b/03_TECHNICAL_CORE/ontology/ARCO_instances_adversarial_decoy_5b.ttl @@ -0,0 +1,77 @@ +@prefix : . +@prefix bfo: . +@prefix cco: . +@prefix ro: . +@prefix iao: . +@prefix rdf: . +@prefix rdfs: . +@prefix owl: . +@prefix skos: . + + rdf:type owl:Ontology ; + rdfs:label "ARCO Adversarial Test: Equivalency Decoy (5(b) parity)" ; + rdfs:comment """Tests that OWL-RL performs real equivalence reasoning on the Annex III 5(b) branch, mirroring the 1(a) decoy at ARCO_instances_adversarial_decoy.ttl. + + :WeirdCalculator is declared owl:equivalentClass to :CreditworthinessEvaluationCapability. + The disposition is typed ONLY as :WeirdCalculator — no Credit/Evaluation/Score/Assessment + vocabulary appears in the alias class, disposition, module, or system IRIs. + + If the reasoner is real, it infers CreditworthinessEvaluationCapability membership + via owl:equivalentClass propagation; the three-gate intersection then entails + :AnnexIII5bApplicableSystem. + + Cross-category isolation is also under test here: post-reasoning the system + must NOT entail :AnnexIII1aApplicableSystem (no biometric identification + capability is bound to any component). + + Adversarial-purity discipline (parallel to the 1(a) decoy): + - no provider organisation, no provider role + - no assessment documentation process, no assessment documentation artifact + - no compliance obligation + - no determination ICE asserted + Only the reality-side path the three-gate axiom needs to fire is asserted. + Closes OPEN_PROBLEMS L3.7 (5(b) adversarial equivalentClass parity).""" ; + owl:imports . + +################################################################# +# ADVERSARIAL SETUP: equivalence decoy class +################################################################# + +:WeirdCalculator rdf:type owl:Class ; + rdfs:label "Weird Calculator (decoy name)" ; + skos:prefLabel "Weird Calculator (decoy name)"@en ; + skos:definition "An adversarial test class declared owl:equivalentClass to :CreditworthinessEvaluationCapability. Used by the 5(b) equivalence-decoy fixture to verify that the OWL reasoner performs real semantic inference via equivalentClass propagation on the credit branch, rather than IRI pattern matching or class-name string matching." ; + rdfs:comment "Equivalent to CreditworthinessEvaluationCapability but with an unrelated name. Tests that the reasoner uses equivalence, not string matching, on the 5(b) branch." ; + owl:equivalentClass :CreditworthinessEvaluationCapability . + +################################################################# +# SYSTEM — typed using ONLY the decoy class name +################################################################# + +:WeirdCalc_Disposition rdf:type :WeirdCalculator ; + rdfs:label "Weird Calculator Disposition (typed as WeirdCalculator only)" . + +:WeirdCalc_Module rdf:type :HardwareComponent ; + rdfs:label "Weird Calculator Hardware Module" ; + ro:0000091 :WeirdCalc_Disposition . + +:WeirdCalcSystem_001 rdf:type :System ; + rdfs:label "Weird Calculator System 001" ; + bfo:0000051 :WeirdCalc_Module . + +################################################################# +# DOCUMENTARY MINIMUM (gates 2 + 3) +################################################################# + +:WeirdCalc_Process rdf:type :CreditworthinessEvaluationProcess ; + rdfs:label "Weird Calculator Process Token" . + +:WeirdCalc_IntendedUse_001 rdf:type :IntendedUseSpecification ; + rdfs:label "Weird Calculator Intended Use" ; + cco:prescribes :WeirdCalc_Process ; + iao:0000136 :WeirdCalcSystem_001 . + +:WeirdCalc_UseScenario_001 rdf:type :UseScenarioSpecification ; + rdfs:label "Weird Calculator Use Scenario" ; + iao:0000136 :WeirdCalcSystem_001 ; + cco:designates :NaturalPersonRole . diff --git a/03_TECHNICAL_CORE/ontology/ARCO_instances_verification.ttl b/03_TECHNICAL_CORE/ontology/ARCO_instances_verification.ttl index e507751..2a87cd3 100644 --- a/03_TECHNICAL_CORE/ontology/ARCO_instances_verification.ttl +++ b/03_TECHNICAL_CORE/ontology/ARCO_instances_verification.ttl @@ -15,7 +15,7 @@ Non-entailment claim: AnnexIII1aApplicableSystem is not entailed for VerificationKiosk_001 under current assertions (OWA — no closed-world guarantee). - Legal basis: Recital 22 and Art. 3(41) EU AI Act — 1:1 biometric verification is excluded from Annex III 1(a); only 1:N remote biometric identification triggers classification.""" ; + Legal basis: Recital 15, Recital 17, and Annex III item 1(a) of EU AI Act 2024/1689 — the verification carve-out ("intended to be used for biometric verification, which includes authentication, whose sole purpose is to confirm that a specific natural person is the person he or she claims to be") excludes 1:1 biometric verification from biometric identification (Recital 15) and from remote biometric identification systems specifically (Recital 17, rationale: minor impact on fundamental rights), and is reaffirmed in the Annex III 1(a) operative text. Article 3(36) defines biometric verification as the automated 1:1 modality; Article 3(41) defines the RBI system. Only 1:N remote biometric identification triggers Annex III 1(a) classification.""" ; owl:imports . ################################################################# @@ -24,7 +24,7 @@ :AnnexIII_Condition_1a_Exclusion rdf:type :RegulatoryContent ; rdfs:label "Annex III 1(a) — Verification Exclusion Note" ; - rdfs:comment "Documents the 1:1 verification exclusion from Annex III 1(a). Recital 22 and Art. 3(41): only 1:N remote biometric identification triggers the Annex III classification." ; + rdfs:comment "Documents the 1:1 verification exclusion from Annex III 1(a). Recital 15, Recital 17, and Annex III 1(a) operative carve-out: only 1:N remote biometric identification triggers the Annex III classification. Article 3(36) defines 1:1 verification; Article 3(41) defines the RBI system." ; iao:0000136 :BiometricVerificationCapability ; iao:0000136 :BiometricIdentificationCapability . diff --git a/03_TECHNICAL_CORE/reasoning/select_system_comment.sparql b/03_TECHNICAL_CORE/reasoning/select_system_comment.sparql index ddf5a60..4b5c75c 100644 --- a/03_TECHNICAL_CORE/reasoning/select_system_comment.sparql +++ b/03_TECHNICAL_CORE/reasoning/select_system_comment.sparql @@ -3,8 +3,9 @@ PREFIX rdfs: # EMISSION LAYER — binds rdfs:comment of the system under evaluation. # # Used by the negative-case output to surface fixture-supplied regulatory -# reasoning (e.g., the kiosk fixture's comment naming Recital 22 / Article -# 3(41) and the verification-vs-identification distinction). The comment +# reasoning (e.g., the kiosk fixture's comment naming Recital 15 + Recital +# 17 + Annex III 1(a) carve-out and the verification-vs-identification +# distinction). The comment # is fixture-authored ground truth and ships with the TTL; this query # avoids embedding fixture-specific legal text as Python literals in the # emitter (per CLAUDE.md Output discipline). diff --git a/03_TECHNICAL_CORE/scripts/hermit_cross_check.py b/03_TECHNICAL_CORE/scripts/hermit_cross_check.py index 5803f05..a3a8920 100644 --- a/03_TECHNICAL_CORE/scripts/hermit_cross_check.py +++ b/03_TECHNICAL_CORE/scripts/hermit_cross_check.py @@ -69,6 +69,7 @@ ("ARCO_instances_creditscoring.ttl", ["CreditScorer_001"]), ("ARCO_instances_verification.ttl", ["VerificationKiosk_001"]), ("ARCO_instances_adversarial_decoy.ttl", ["DecoySystem_001"]), + ("ARCO_instances_adversarial_decoy_5b.ttl", ["WeirdCalcSystem_001"]), ("ARCO_instances_flag_tests.ttl", ["FlagTest_BiometricSystem_WithDerogationClaim", "FlagTest_CreditSystem_WithFraudProcess"]), ] diff --git a/03_TECHNICAL_CORE/scripts/run_pipeline.py b/03_TECHNICAL_CORE/scripts/run_pipeline.py index 37afbe4..c3246fd 100644 --- a/03_TECHNICAL_CORE/scripts/run_pipeline.py +++ b/03_TECHNICAL_CORE/scripts/run_pipeline.py @@ -442,9 +442,10 @@ def get_system_comment(g: Graph, system_local: str) -> str: """Return rdfs:comment of the system instance, or empty string. Used by the negative-case Provider Obligations panel to surface fixture- - authored regulatory reasoning (e.g., the kiosk fixture's note on Recital - 22 / Article 3(41)) as a documentary scope text rather than a Python - literal in the emitter. Returns "" on query failure or empty result; + authored regulatory reasoning (e.g., the kiosk fixture's note on + Recital 15 + Recital 17 + Annex III 1(a) carve-out) as a documentary + scope text rather than a Python literal in the emitter. Returns "" on + query failure or empty result; emitter is responsible for skipping the panel when empty. """ try: @@ -563,6 +564,20 @@ def select_gate_evidence(g: Graph, system_local: str) -> dict: return packet + +def gate3_designates_expected_role(gate_evidence: dict) -> bool: + """Return whether Gate 3 satisfies ARCO's current role-designation axiom. + + Current modeled Annex III branches both require the USS to designate the + NaturalPersonRole universal. OPEN_PROBLEMS L3.2 tracks future + category-parameterized role expectations. + """ + return ( + bool(gate_evidence["gate3"]["uss_uri"]) + and gate_evidence["gate3"]["role_uri"] == f"{ARCO_NS}NaturalPersonRole" + ) + + def verify_high_risk_inference(reasoned: Graph, source: Graph) -> tuple[bool, bool, bool, list[tuple[str, str]]]: """Returns (inference_ok, asserted_pre, entailed_post, bindings).""" hr("ARCO RESULT (ENTAILMENT + PROOF SKETCH)") @@ -859,9 +874,13 @@ def _annex(val): if val is None: return 'N/A' if val: - if not derogation_flagged: - return ('VERIFIED (ENTAILED, ' - 'Article 6(3) derogation not evaluated)') + # Pure graph-backed entailment value. The Article 6(3) derogation + # scope qualifier (when no DerogationClaim is asserted) is + # surfaced as the separate `derogation_scope_badge` rendered + # alongside the conclusion banner, per + # output_manifest_v2.yaml field `derogation_evaluation_scope` + # (forbidden_pattern: embedding the qualifier into a + # graph_backed value). return 'VERIFIED (ENTAILED)' return 'NOT APPLICABLE' @@ -1004,8 +1023,14 @@ def edge_el(rel, iri_label=""): # typed-content satisfaction by itself — its own header at # reasoning/check_intended_use.sparql:5-12 declares it documentary. gate2_ok = bool(gate_evidence["gate2"]["process_type_uri"]) - # Gate 3: USS designation is asserted (already evidence-coupled). - gate3_ok = bool(gate_evidence["gate3"]["uss_uri"]) + # Gate 3: USS designation is asserted AND designates the expected role + # universal. The OWL Gate 3 axiom uses + # `cco:designates owl:hasValue :NaturalPersonRole`; a fixture asserting + # a USS that designates a different role would otherwise display as + # Gate 3 OK while OWL correctly does not entail Gate 3 satisfaction. + # (L3.4 truth-surface fix; per-category role parameterization tracked + # at L3.2.) + gate3_ok = gate3_designates_expected_role(gate_evidence) # Pre-compute gate display labels from the determination packet. # These are used in axiom pattern text, gate answers, and counterfactuals. @@ -1240,8 +1265,9 @@ def edge_el(rel, iri_label=""): # ARCO does not model Title IV obligations; see LIMITATIONS §2). It # is dropped here. # When the loaded fixture provides a system-level rdfs:comment with - # regulatory framing (e.g., the kiosk fixture's note on Recital 22 - # / Art. 3(41)), surface it via the documentary scope text path + # regulatory framing (e.g., the kiosk fixture's note on Recital 15 + + # Recital 17 + Annex III 1(a) carve-out), surface it via the + # documentary scope text path # declared in output_manifest_v2.yaml. Otherwise the panel reports # the OWA-bounded non-entailment note alone. if system_comment: @@ -2053,11 +2079,20 @@ def main() -> None: print(" [classification layer — OWL-RL entailment]") print(f"SHACL: {_pf(shacl_ok)}") print(f"Entailment: {_pf(inference_ok)}") - _summary_qualifier = ", Article 6(3) derogation not evaluated" if not derogation_flagged else "" + # Pure graph-backed entailment values. Article 6(3) derogation scope + # is reported as a separate run-metadata line below, not embedded in + # the graph_backed VERIFIED literal (per output_manifest_v2.yaml + # field `derogation_evaluation_scope` forbidden_pattern). if annex_iii_1a_ok is not None: - print(f"Annex III 1a: {'VERIFIED (ENTAILED' + _summary_qualifier + ')' if annex_iii_1a_ok else 'NOT APPLICABLE'} (OWL-entailed)") + print(f"Annex III 1a: {'VERIFIED (ENTAILED)' if annex_iii_1a_ok else 'NOT APPLICABLE'} (OWL-entailed)") if annex_iii_5b_ok is not None: - print(f"Annex III 5b: {'VERIFIED (ENTAILED' + _summary_qualifier + ')' if annex_iii_5b_ok else 'NOT APPLICABLE'} (OWL-entailed)") + print(f"Annex III 5b: {'VERIFIED (ENTAILED)' if annex_iii_5b_ok else 'NOT APPLICABLE'} (OWL-entailed)") + # Separate run-scope disclosure for derogation evaluation. Surfaces + # only when a category is entailed and no DerogationClaim is asserted + # (when a claim IS asserted, the existing FLAG row signals the + # unevaluated derogation; do not double-disclose). + if (annex_iii_1a_ok or annex_iii_5b_ok) and not derogation_flagged: + print(" [run scope] Article 6(3) derogation: NOT EVALUATED") print() print(" [audit documentation layer — SPARQL ASK on reasoned graph]") print(f"Traceability: {_pf(traceability_ok)}") @@ -2266,26 +2301,29 @@ def main() -> None: # audit record. if intended_use_ok is not None: print(f" INTENDED USE: {_pf(intended_use_ok)}") - # Article 6(3) derogation scope qualifier: ARCO does not evaluate the + # Article 6(3) derogation scope: ARCO does not evaluate the # Article 6(3) carve-out conditions; it only detects whether a provider- - # supplied :DerogationClaim artifact is asserted. When such a claim is - # asserted, the FLAG line below signals the unevaluated derogation. When - # no claim is asserted, an Annex III ENTAILED conclusion otherwise reads - # as "high-risk classification confirmed" — append a same-line scope - # qualifier so the disclosure rides with the conclusion. - _annex_iii_entailed_qualifier = "VERIFIED (ENTAILED, Article 6(3) derogation not evaluated)" + # supplied :DerogationClaim artifact is asserted. The Annex III ENTAILED + # conclusion below is a pure graph_backed value; the derogation scope + # disclosure rides on its own line (see "ARTICLE 6(3) DEROGATION" below) + # per output_manifest_v2.yaml field `derogation_evaluation_scope` + # forbidden_pattern (Python concatenation that embeds the qualifier + # into a graph_backed value). if annex_iii_1a_ok is not None: - if annex_iii_1a_ok: - _line_1a = _annex_iii_entailed_qualifier if not derogation_flagged else "VERIFIED (ENTAILED)" - else: - _line_1a = "NOT APPLICABLE" + _line_1a = "VERIFIED (ENTAILED)" if annex_iii_1a_ok else "NOT APPLICABLE" print(f" ANNEX III 1(a): {_line_1a}") if annex_iii_5b_ok is not None: if annex_iii_5b_ok: - _line_5b = _annex_iii_entailed_qualifier if not derogation_flagged else "VERIFIED (ENTAILED)" + _line_5b = "VERIFIED (ENTAILED)" else: _line_5b = "NOT APPLICABLE" print(f" ANNEX III 5(b): {_line_5b}") + # Separate run-scope disclosure for derogation evaluation. Surfaces + # only when a category is entailed and no DerogationClaim is asserted + # (when a claim IS asserted, the existing FLAG line below already + # signals the unevaluated derogation; do not double-disclose). + if (annex_iii_1a_ok or annex_iii_5b_ok) and not derogation_flagged: + print(" ARTICLE 6(3) DEROGATION: NOT EVALUATED (run scope)") if obligation_ok is not None: print(f" OBLIGATION: {_cert_obligation_label}") if reg_alignment_ok is not None: @@ -2333,18 +2371,20 @@ def main() -> None: # summary.json["latent_risk"] for audit consumers. if intended_use_ok is not None: cert_lines.append(f" INTENDED USE: {_pf(intended_use_ok)}") + # Pure graph-backed entailment values. The Article 6(3) derogation + # scope rides on the separate "ARTICLE 6(3) DEROGATION" line below + # (per output_manifest_v2.yaml field `derogation_evaluation_scope` + # forbidden_pattern). if annex_iii_1a_ok is not None: - if annex_iii_1a_ok: - _cert_line_1a = _annex_iii_entailed_qualifier if not derogation_flagged else "VERIFIED (ENTAILED)" - else: - _cert_line_1a = "NOT APPLICABLE" + _cert_line_1a = "VERIFIED (ENTAILED)" if annex_iii_1a_ok else "NOT APPLICABLE" cert_lines.append(f" ANNEX III 1(a): {_cert_line_1a}") if annex_iii_5b_ok is not None: - if annex_iii_5b_ok: - _cert_line_5b = _annex_iii_entailed_qualifier if not derogation_flagged else "VERIFIED (ENTAILED)" - else: - _cert_line_5b = "NOT APPLICABLE" + _cert_line_5b = "VERIFIED (ENTAILED)" if annex_iii_5b_ok else "NOT APPLICABLE" cert_lines.append(f" ANNEX III 5(b): {_cert_line_5b}") + # Separate run-scope disclosure for derogation evaluation. Surfaces + # only when a category is entailed and no DerogationClaim is asserted. + if (annex_iii_1a_ok or annex_iii_5b_ok) and not derogation_flagged: + cert_lines.append(" ARTICLE 6(3) DEROGATION: NOT EVALUATED (run scope)") if obligation_ok is not None: cert_lines.append(f" OBLIGATION: {_cert_obligation_label}") if reg_alignment_ok is not None: @@ -2416,8 +2456,17 @@ def main() -> None: latent_ok, "DETECTED", "NOT_DETECTED", ), "intended_use": (_pf(intended_use_ok) if intended_use_ok is not None else "NOT_RUN"), - "annex_iii_1a": (("VERIFIED (ENTAILED, Article 6(3) derogation not evaluated)" if not derogation_flagged else "VERIFIED (ENTAILED)") if annex_iii_1a_ok else ("NOT APPLICABLE" if annex_iii_1a_ok is not None else "N/A")), - "annex_iii_5b": (("VERIFIED (ENTAILED, Article 6(3) derogation not evaluated)" if not derogation_flagged else "VERIFIED (ENTAILED)") if annex_iii_5b_ok else ("NOT APPLICABLE" if annex_iii_5b_ok is not None else "N/A")), + # Pure graph-backed entailment values. Article 6(3) derogation + # scope rides on the separate `derogation_evaluation_scope` + # field below, not embedded in these graph_backed literals + # (per output_manifest_v2.yaml field `derogation_evaluation_scope` + # forbidden_pattern). + "annex_iii_1a": ("VERIFIED (ENTAILED)" if annex_iii_1a_ok else ("NOT APPLICABLE" if annex_iii_1a_ok is not None else "N/A")), + "annex_iii_5b": ("VERIFIED (ENTAILED)" if annex_iii_5b_ok else ("NOT APPLICABLE" if annex_iii_5b_ok is not None else "N/A")), + "derogation_evaluation_scope": { + "evaluated": False, + "reason": "Article 6(3) derogation evaluation not modeled in current ARCO release; see LIMITATIONS.md §3.7", + }, "obligation": _status_label( obligation_ok, "PASS", "FAIL", not_applicable_label="NOT_APPLICABLE" if non_applicable_run else None, @@ -2536,7 +2585,6 @@ def main() -> None: # (run_pipeline.py:806): typed-evidence presence, not the # documentary ASK `intended_use_ok`. Closes the schema-incoherent # SATISFIED-with-empty-evidence state on non-applicable runs. - # Parallel to gate_3 below which is already evidence-coupled. "status": "SATISFIED" if bool(gate_evidence["gate2"]["process_type_uri"]) else "NOT_SATISFIED", "evidence": { "ius_uri": gate_evidence["gate2"]["ius_uri"], @@ -2548,7 +2596,10 @@ def main() -> None: { "id": "gate_3", "label": "Affected Role Category", - "status": "SATISFIED" if bool(gate_evidence["gate3"]["uss_uri"]) else "NOT_SATISFIED", + # Packet-side Gate 3 status mirrors HTML-side `gate3_ok`: + # USS existence is not enough; the designated role must match + # the current OWL Gate 3 target. + "status": "SATISFIED" if gate3_designates_expected_role(gate_evidence) else "NOT_SATISFIED", "evidence": { "uss_uri": gate_evidence["gate3"]["uss_uri"], "role_uri": gate_evidence["gate3"]["role_uri"], diff --git a/03_TECHNICAL_CORE/scripts/test_adversarial_mechanism.py b/03_TECHNICAL_CORE/scripts/test_adversarial_mechanism.py index 4ce05c6..5ff163b 100644 --- a/03_TECHNICAL_CORE/scripts/test_adversarial_mechanism.py +++ b/03_TECHNICAL_CORE/scripts/test_adversarial_mechanism.py @@ -7,7 +7,7 @@ would let a pattern-matching pipeline pass test_scenarios.py with the wrong mechanism in place. - Decoy fixture (ARCO_instances_adversarial_decoy.ttl): + Decoy fixture (ARCO_instances_adversarial_decoy.ttl) — Annex III 1(a): The disposition is typed only as :WeirdScanner. :WeirdScanner is declared owl:equivalentClass :BiometricIdentificationCapability in the same fixture. Pre-reasoning, no asserted triple types the disposition as @@ -18,6 +18,16 @@ :BiometricIdentificationCapability triple in the input and miss the classification. The OWL reasoner does not. + Decoy 5(b) fixture (ARCO_instances_adversarial_decoy_5b.ttl) — Annex III 5(b): + Same shape as the 1(a) decoy, branch-swapped. :WeirdCalculator is declared + owl:equivalentClass :CreditworthinessEvaluationCapability. Disposition is + typed only as :WeirdCalculator. Post-reasoning the disposition entails as + :CreditworthinessEvaluationCapability and the three-gate intersection + entails :AnnexIII5bApplicableSystem. Cross-category isolation is also + under test: post-reasoning the system MUST NOT entail + :AnnexIII1aApplicableSystem (no biometric capability bound). Closes + OPEN_PROBLEMS L3.7. + Ghost fixture (ARCO_instances_adversarial_blanknode.ttl): The disposition is an anonymous individual (blank node) typed as :BiometricIdentificationCapability. owl:someValuesFrom requires @@ -137,6 +147,80 @@ def test_decoy_classifies_via_equivalent_class() -> bool: return ok +def test_credit_decoy_classifies_via_equivalent_class() -> bool: + """ + Verify that WeirdCalcSystem_001's Gate 1 entailment fires via + owl:equivalentClass propagation on the Annex III 5(b) branch, not via + direct IRI assertion. Mirrors test_decoy_classifies_via_equivalent_class + for the credit branch. Closes OPEN_PROBLEMS L3.7. + + Five assertions: + 1. Pre-reasoning: disposition rdf:type :WeirdCalculator (alias only). + 2. Pre-reasoning: disposition rdf:type :CreditworthinessEvaluationCapability + is ABSENT (proves alias-only routing). + 3. Post-reasoning: disposition rdf:type :CreditworthinessEvaluationCapability + is present (equivalentClass propagation fired). + 4. Post-reasoning: system rdf:type :AnnexIII5bApplicableSystem + (three-gate intersection entails). + 5. Post-reasoning: system rdf:type :AnnexIII1aApplicableSystem + is ABSENT (cross-category isolation; no biometric capability bound). + """ + print("\n--- DECOY 5(b): classification via owl:equivalentClass (credit branch) ---") + fixture = ONTOLOGY_DIR / "ARCO_instances_adversarial_decoy_5b.ttl" + g_pre = parse_fixture(fixture) + + calc_disp = ARCO["WeirdCalc_Disposition"] + weird_calculator = ARCO["WeirdCalculator"] + credit_cap = ARCO["CreditworthinessEvaluationCapability"] + calc_system = ARCO["WeirdCalcSystem_001"] + annex_5b = ARCO["AnnexIII5bApplicableSystem"] + annex_1a = ARCO["AnnexIII1aApplicableSystem"] + + # Precondition 1: disposition is typed as :WeirdCalculator (the decoy class) + has_alias_pre = (calc_disp, RDF["type"], weird_calculator) in g_pre + # Precondition 2: disposition is NOT directly typed as + # :CreditworthinessEvaluationCapability before reasoning + has_credit_pre = (calc_disp, RDF["type"], credit_cap) in g_pre + # Precondition 3: System is NOT yet classified as 5(b) applicable + is_5b_pre = (calc_system, RDF["type"], annex_5b) in g_pre + + ok = True + print(f" pre-reasoning :WeirdCalc_Disposition rdf:type :WeirdCalculator: {has_alias_pre} (expected True)") + if not has_alias_pre: + ok = False + print(f" pre-reasoning :WeirdCalc_Disposition rdf:type :CreditworthinessEvaluationCapability: {has_credit_pre} (expected False)") + if has_credit_pre: + print(" FAIL: decoy disposition asserted directly as CreditworthinessEvaluationCapability; " + "test premise (equivalence-only routing) is invalid.") + ok = False + print(f" pre-reasoning :WeirdCalcSystem_001 rdf:type :AnnexIII5bApplicableSystem: {is_5b_pre} (expected False)") + if is_5b_pre: + ok = False + + # Reason and re-check + g_post = reason(g_pre) + has_credit_post = (calc_disp, RDF["type"], credit_cap) in g_post + is_5b_post = (calc_system, RDF["type"], annex_5b) in g_post + is_1a_post = (calc_system, RDF["type"], annex_1a) in g_post + + print(f" post-reasoning :WeirdCalc_Disposition rdf:type :CreditworthinessEvaluationCapability: {has_credit_post} (expected True)") + if not has_credit_post: + ok = False + print(f" post-reasoning :WeirdCalcSystem_001 rdf:type :AnnexIII5bApplicableSystem: {is_5b_post} (expected True)") + if not is_5b_post: + ok = False + # Cross-category isolation check. + print(f" post-reasoning :WeirdCalcSystem_001 rdf:type :AnnexIII1aApplicableSystem: {is_1a_post} (expected False)") + if is_1a_post: + print(" FAIL: cross-category isolation broken; 5(b) decoy entailed into 1(a) class.") + ok = False + + if ok: + print(" RESULT: 5(b) classification routed via owl:equivalentClass, not via direct IRI.") + print(" Cross-category isolation preserved (no 1(a) entailment).") + return ok + + def test_ghost_classifies_via_blank_node_disposition() -> bool: """ Verify that GhostSystem_001's Gate 1 entailment fires via a blank-node @@ -215,6 +299,8 @@ def main() -> int: all_pass = True if not test_decoy_classifies_via_equivalent_class(): all_pass = False + if not test_credit_decoy_classifies_via_equivalent_class(): + all_pass = False if not test_ghost_classifies_via_blank_node_disposition(): all_pass = False @@ -222,7 +308,8 @@ def main() -> int: print("=" * 72) if all_pass: print("ALL ADVERSARIAL-MECHANISM TESTS PASSED") - print("Decoy classification routes via owl:equivalentClass.") + print("Decoy 1(a) classification routes via owl:equivalentClass.") + print("Decoy 5(b) classification routes via owl:equivalentClass; cross-category isolated.") print("Ghost classification routes via blank-node owl:someValuesFrom witness.") print("Neither relies on a direct IRI assertion or a named individual.") else: diff --git a/03_TECHNICAL_CORE/scripts/test_output_provenance.py b/03_TECHNICAL_CORE/scripts/test_output_provenance.py index 14a2ea5..666492b 100644 --- a/03_TECHNICAL_CORE/scripts/test_output_provenance.py +++ b/03_TECHNICAL_CORE/scripts/test_output_provenance.py @@ -282,8 +282,8 @@ def main() -> int: print(f"Pipeline: {PIPELINE_PATH.relative_to(REPO_ROOT)}") print(f"Manifest version: {manifest.get('manifest_version')}") print() - print("This test fails today by design. Each failure names a v1.2 emitter") - print("violation. PR2-PR6 each move at least one failure to passing.") + print("This test names output-provenance emitter violations when present.") + print("Current acceptance target: zero failures.") print() hr("Check 1 — forbidden source patterns") diff --git a/LIMITATIONS.md b/LIMITATIONS.md index af3e666..ce3ea66 100644 --- a/LIMITATIONS.md +++ b/LIMITATIONS.md @@ -111,7 +111,7 @@ Gate 1 locates the capability disposition on a `SystemComponent`, not on the `Sy A second, narrower simplification sits inside this gate: for a model-driven biometric module, the in-repo Sentinel fixture types the bearer as `:HardwareComponent` only, but a strict realist reading would locate the disposition on the *amalgam* of hardware plus the concretized model artifact running on it. The 2024 Capabilities paper's hardware-software-amalgam discussion treats software qua pattern as a generically dependent continuant, not itself a capable continuant; capabilities require a material bearer that concretizes the pattern. ARCO's classification result does not depend on which of these two bearer choices is taken — Gate 1 is satisfied as long as some component bearer instantiates a triggering-capability disposition — so the simplification is documented here rather than rebuilt as a structural change. -A third, related point: for software-configurable AI systems where the same hardware can be configured for different modes (e.g., 1:1 verification vs 1:N identification on the same biometric kiosk hardware), the disposition assertion describes what THIS specific deployment is intended to do under its current commitments, not what the hardware-in-isolation could theoretically do. ARCO does not make closed-world hardware-incapability claims; per-fixture disposition assertions reflect the configured-system commitments under OWA. A different deployment of the same hardware (different configuration, software, or database) would be modeled as a separate `:System` instance with its own asserted disposition. This matches the EU AI Act's classification on intended use (Article 3(36), Recital 15), not on raw hardware capability. Validated 2026-05-10 against vendor documentation for Suprema, ZKTeco, Matrix, HID, and IDEMIA biometric kiosks, all of which advertise the same hardware as configurable for both 1:1 and 1:N modes. +A third, related point: for software-configurable AI systems where the same hardware can be configured for different modes (e.g., 1:1 verification vs 1:N identification on the same biometric kiosk hardware), the disposition assertion describes what THIS specific deployment is intended to do under its current commitments, not what the hardware-in-isolation could theoretically do. ARCO does not make closed-world hardware-incapability claims; per-fixture disposition assertions reflect the configured-system commitments under OWA. A different deployment of the same hardware (different configuration, software, or database) would be modeled as a separate `:System` instance with its own asserted disposition. This matches the EU AI Act's classification on intended use (Recital 15, Recital 17, and the Annex III 1(a) operative carve-out, which together carry the "intended to be used for biometric verification" framing), not on raw hardware capability. Article 3(36) supplies the underlying 1:1 verification definition. Validated 2026-05-10 against vendor documentation for Suprema, ZKTeco, Matrix, HID, and IDEMIA biometric kiosks, all of which advertise the same hardware as configurable for both 1:1 and 1:N modes. ### 3.6 Reality/representation split @@ -138,7 +138,7 @@ The IUS subkind family is membership-fixed by Annex III categorial criterion, no #### 3.7.c Real-time vs. post RBI subclass declaration; Article 5 routing scoped future -ARCO declares `:PostRemoteBiometricIdentificationProcess` (Article 3(43): a remote biometric identification process other than real-time) and `:RealTimeRemoteBiometricIdentificationProcess` (Article 3(42): capturing, comparison and identification all occurring without a significant delay, comprising not only instant identification but also limited short delays in order to avoid circumvention) as disjoint subclasses of `:RemoteBiometricIdentificationProcess`. The disjointness is correct: the regulation defines the two as exhaustive subtypes within the parent term. +ARCO declares `:PostRemoteBiometricIdentificationProcess` and `:RealTimeRemoteBiometricIdentificationProcess` as disjoint subclasses of `:RemoteBiometricIdentificationProcess`. The regulatory verbatim text uses "system" rather than "process": Article 3(43) defines a post-RBI system as "a remote biometric identification system other than a real-time remote biometric identification system," and Article 3(42) defines a real-time RBI system as one whereby "the capturing of biometric data, the comparison and the identification all occur without a significant delay, comprising not only instant identification, but also limited short delays in order to avoid circumvention." ARCO models these as Process subclasses (BFO Bucket 4 / Occurrent) since classification reasons over the deployment-and-operation occurrence rather than the system artifact alone; the class-name "Process" is ARCO's modeling translation of the regulation's "system" term. The disjointness is correct: the regulation defines the two as exhaustive subtypes within the parent term. The Annex III 1(a) Gate 2 axiom continues to reference the parent class via `someValuesFrom`, so subclass propagation means an IUS prescribing either a real-time or a post RBI process particular satisfies Gate 2 and entails `:AnnexIII1aApplicableSystem`. No fixture is currently typed into either subclass; both are forward-declared. @@ -251,7 +251,7 @@ A SHACL fail and an OWL-RL classification result are independent. Diagnose each ### 7.4 Cross-reasoner agreement and known profile divergence -**Does:** Run a HermiT (full OWL 2 DL) cross-check via ROBOT in CI on every push and PR. The check merges ontology + imports + core + governance + each certificate-grade fixture, runs HermiT, and compares classification SPARQL results against the production OWL-RL pipeline for every modeled system across the certificate-grade fixture set (Sentinel, CreditScorer, verification kiosk, DecoySystem, both flag tests). Disagreement on any (fixture, system, query) triple fails the build. Logic and exclusion rules: [03_TECHNICAL_CORE/scripts/hermit_cross_check.py](03_TECHNICAL_CORE/scripts/hermit_cross_check.py). +**Does:** Run a HermiT (full OWL 2 DL) cross-check via ROBOT in CI on every push and PR. The check merges ontology + imports + core + governance + each certificate-grade fixture, runs HermiT, and compares classification SPARQL results against the production OWL-RL pipeline for every modeled system across the certificate-grade fixture set (Sentinel, CreditScorer, verification kiosk, DecoySystem, WeirdCalcSystem, both flag tests). Disagreement on any (fixture, system, query) triple fails the build. Logic and exclusion rules: [03_TECHNICAL_CORE/scripts/hermit_cross_check.py](03_TECHNICAL_CORE/scripts/hermit_cross_check.py). **Does not:** Cover the `ARCO_instances_adversarial_blanknode.ttl` fixture (`GhostSystem_001`). GhostSystem's disposition is an anonymous individual (blank node), and HermiT does not emit `ClassAssertion` axioms for anonymous individuals in its serialized output — this is correct DL profile behavior, since anonymous individuals are existential witnesses for satisfiability, not first-class ABox individuals. The `detect_latent_risk` audit traversal therefore returns `False` under HermiT and `True` under OWL-RL on GhostSystem. The classification entailment itself (`HighRiskSystem`, `AnnexIII1aApplicableSystem`) fires correctly under both reasoners; the divergence is confined to the audit-side traversal that walks `?system → ?component → ?disposition a :AnnexIIITriggeringCapability`. @@ -273,13 +273,13 @@ The pipeline's output layer (`run_pipeline.py` from line 1699 onward, plus `writ **Sentinel-shaped hardcoding** (the same affected-role pattern is hardcoded in three independent places, making the Gate 3 surface category-specific rather than parameterized): -- Python: `gate_evidence["gate3"]["role_uri"]` initializes to `:NaturalPersonRole` (`run_pipeline.py:371`); display labels fall back to biometric-identification strings when SPARQL returns zero rows (`run_pipeline.py:740-743`); `gate3_ok` checks USS existence only, not which role is designated (`run_pipeline.py:735`). **LIVE** (OPEN_PROBLEMS L3.2 and L3.4): the Python `role_uri` initialization and fallback display labels are still category-specific to biometric identification; `gate3_ok` is still a `bool(uss_uri)` Python predicate weaker than the OWL Gate 3 axiom. +- Python: Gate 3's display and determination-packet status now require the USS to designate `:NaturalPersonRole`; USS existence alone is not treated as Gate 3 satisfaction. **CLOSED 2026-05-20 (OPEN_PROBLEMS L3.4)** for the current modeled categories. **LIVE** (OPEN_PROBLEMS L3.2): the role target remains hardcoded to `:NaturalPersonRole` rather than parameterized by Annex III category. - SPARQL: `check_intended_use.sparql:31` hardcodes `cco:designates :NaturalPersonRole`, bundling Gates 2 and 3 into one ASK with the affected role baked in. **LIVE** (OPEN_PROBLEMS L3.2). - SHACL: `assessment_documentation_shape.ttl:96-100` hardcodes `sh:hasValue :NaturalPersonRole` at the shape level. **LIVE** (OPEN_PROBLEMS L3.2). The 5(b) HTML-emission counterpart was a separate locus and is **CLOSED 2026-05-10 (PR #34, OPEN_PROBLEMS L3.3)**: `write_html_view`'s 5(b) branch now reads `gate_evidence["gate2"]["process_type_label"]` and the parameterized gate-evidence helpers rather than Python literals. **Synthesized narrative**: -- The certificate's Annex III line carries a Python string literal "VERIFIED (ENTAILED, Article 6(3) derogation not evaluated)" (`run_pipeline.py:1725-1738`). The graph does not commit to "derogation not evaluated" as a state. **HEADLINE COMPOSITION CLOSED 2026-05-10 (PR #36, OPEN_PROBLEMS L4.3).** New `reasoning/select_primary_classification.sparql` returns the entailed class IRI list; the headline value is now the pure class IRI. Mode/scope text moved to separate fields. Composite qualifiers like `(ENTAILED, all three ARCO gates)` and `(Annex III Capability-Precondition Flag; ...; not the EU AI Act legal high-risk classification)` are dropped from the headline format functions. **REMAINING**: the polarity of the `(Article 6(3) derogation not evaluated)` qualifier when a `:DerogationClaim` is flagged is a queued semantic-correctness item (regulator-defensibility concern surfaced 2026-05-11; the honest qualifier should *strengthen*, not drop, when more uncertainty exists). Tracked separately. +- The certificate's Annex III line now emits pure graph-backed values such as `VERIFIED (ENTAILED)` or `NOT APPLICABLE`. Article 6(3) derogation scope is surfaced separately as run metadata (`ARTICLE 6(3) DEROGATION: NOT EVALUATED (run scope)` in the certificate and `derogation_evaluation_scope` in `summary.json`). **CLOSED 2026-05-20** for the mixed-provenance string caught by `test_output_provenance.py`. **REMAINING**: Article 6(3) validity is still not evaluated; `:DerogationClaim` is surfaced for human legal review only. **Operational**: @@ -303,7 +303,7 @@ The pipeline's output layer (`run_pipeline.py` from line 1699 onward, plus `writ ## 9. Engineering gaps -- **Negative test infrastructure is incomplete.** Gate-removal regression tests ([test_gate_removal.py](03_TECHNICAL_CORE/scripts/test_gate_removal.py)) verify each gate is independently necessary for both modeled Annex III categories (1(a) Sentinel and 5(b) CreditScorer) by mutating axioms and confirming entailment breaks. Symmetric coverage across the two categories was added 2026-05-14. Adversarial-mechanism tests ([test_adversarial_mechanism.py](03_TECHNICAL_CORE/scripts/test_adversarial_mechanism.py)) verify that DecoySystem_001 classifies via `owl:equivalentClass` (not direct IRI assertion) and that GhostSystem_001 classifies via blank-node `owl:someValuesFrom` (not a named individual). What does **not** exist is a parameterized test harness that loads an isolated deliberately-miscategorized instance file and runs the full pipeline against it, because the pipeline currently loads all TTL files into a single graph before reasoning — a negative-case file in the graph would contaminate positive cases. Building this harness is Step 2 of the ADR-001 work plan. Noted in [docs/agent/bfo_cco_alignment_audit.md](docs/agent/bfo_cco_alignment_audit.md) §"Unresolved Engineering Problem." +- **Negative test infrastructure is incomplete.** Gate-removal regression tests ([test_gate_removal.py](03_TECHNICAL_CORE/scripts/test_gate_removal.py)) verify each gate is independently necessary for both modeled Annex III categories (1(a) Sentinel and 5(b) CreditScorer) by mutating axioms and confirming entailment breaks. Symmetric coverage across the two categories was added 2026-05-14. Adversarial-mechanism tests ([test_adversarial_mechanism.py](03_TECHNICAL_CORE/scripts/test_adversarial_mechanism.py)) verify three cases: DecoySystem_001 classifies via `owl:equivalentClass` on the Annex III 1(a) branch (not direct IRI assertion); WeirdCalcSystem_001 classifies via `owl:equivalentClass` on the 5(b) branch with cross-category isolation preserved (added 2026-05-20); and GhostSystem_001 classifies via blank-node `owl:someValuesFrom` (not a named individual). What does **not** exist is a parameterized test harness that loads an isolated deliberately-miscategorized instance file and runs the full pipeline against it, because the pipeline currently loads all TTL files into a single graph before reasoning — a negative-case file in the graph would contaminate positive cases. Building this harness is Step 2 of the ADR-001 work plan. Noted in [docs/agent/bfo_cco_alignment_audit.md](docs/agent/bfo_cco_alignment_audit.md) §"Unresolved Engineering Problem." - **Dependency pinning.** The pipeline is verified only on: `rdflib==7.6.0`, `pyshacl==0.31.0`, `owlrl==7.1.4`. Upgrading any of these requires re-running the full regression suite. The `owlrl` pin is especially load-bearing: Gate 2 and Gate 3 use anonymous inverse property restrictions whose entailment behavior could change across reasoner versions. - **`AnnexIII_Condition_1a cco:prescribes :RemoteBiometricIdentificationProcess`** remains in [ARCO_governance_extension.ttl](03_TECHNICAL_CORE/ontology/ARCO_governance_extension.ttl) (moved 2026-05-14 from the Sentinel instance file to the governance extension as universal regulatory content) as a class-as-individual triple retained for regulatory traceability. The companion 5(b) triple `:AnnexIII_Condition_5b cco:prescribes :CreditworthinessEvaluationProcess` is in the same file. Neither affects current classification. Both are known blockers for a future CCO import (see §4). - **Single-reasoner portability.** The anonymous inverse property expressions in Gate 2 and Gate 3 equivalentClass axioms have been empirically verified on `owlrl==7.1.4` only. Other OWL-RL reasoners may or may not materialize these the same way. diff --git a/README.md b/README.md index 0ae4ebc..8cca2ed 100644 --- a/README.md +++ b/README.md @@ -124,13 +124,13 @@ The architecture grounds in BFO 2020 (ISO/IEC 21838-2:2021) and uses the seven-b **The three layers do different jobs and are not interchangeable.** OWL-RL classifies (entails membership in the Annex III applicability classes). SHACL validates that the documentary record supporting a determination is structurally complete. SPARQL audits the post-reasoning graph and surfaces conditions for human review. A SHACL pass does not mean the system is high-risk. A SPARQL false does not overturn an OWL classification. The certificate's auditability turns on keeping the layers distinct. -**Actual OWL inference fires, not string matching.** One adversarial fixture types its capability disposition only as `:WeirdScanner`; the regulated class IRI never appears in the input data and the connection runs through an `owl:equivalentClass` declaration. Another fixture's disposition has no IRI at all (anonymous blank node, `ARCO_instances_adversarial_blanknode.ttl:28`). Both classify correctly because the reasoner performs actual OWL inference. An approach that did string matching on class names, or required named individuals at every position, would miss both. +**Actual OWL inference fires, not string matching.** Two adversarial fixtures, one per modeled Annex III category, type their capability dispositions only as a decoy class (`:WeirdScanner` for 1(a), `:WeirdCalculator` for 5(b)); the regulated class is not asserted as the disposition's type, and the connection runs through an `owl:equivalentClass` declaration in each. A third fixture's disposition has no IRI at all (anonymous blank node, `ARCO_instances_adversarial_blanknode.ttl:28`). All three classify correctly because the reasoner performs actual OWL inference: the decoys via `owl:equivalentClass` propagation, the blank-node fixture via `owl:someValuesFrom` satisfaction. An approach that did string matching on class names, or required named individuals at every position, would miss them. **Layer separation is verified by fixtures.** Two flag-test fixtures present cases where all three Annex III gates are satisfied AND an audit-layer flag (a provider-asserted `:DerogationClaim`, or a `:FraudDetectionProcess` token) is also present. The OWL classification fires regardless of the audit flag; the flag fires alongside the classification. Classification and audit do not bleed into each other. **Gate independence is empirically verified.** A regression test removes the supporting triples for each Annex III 1(a) and 5(b) gate in turn and confirms the classification fails. Each gate is independently necessary in both categories; removing any one breaks the entailment. Content-mutation variants (wrong process type, wrong designation target) verify that the gates check content, not just existence. -**The certificate's classification binds to graph queries.** Classification field and evidence path are bound to SPARQL queries against the reasoned graph; the contract lives in `03_TECHNICAL_CORE/scripts/output_manifest_v2.yaml`, enforced by `test_output_provenance.py` (failing-by-design). Tightening provenance labels across surrounding fields is active work ([`OPEN_PROBLEMS.md L4.4-L4.6`](OPEN_PROBLEMS.md), [`LIMITATIONS.md §7.5`](LIMITATIONS.md)). +**The certificate's classification binds to graph queries.** Classification field and evidence path are bound to SPARQL queries against the reasoned graph; the contract lives in `03_TECHNICAL_CORE/scripts/output_manifest_v2.yaml`, enforced by `test_output_provenance.py`. The former Article 6(3) mixed-provenance failure is closed; broader provenance-label and schema hardening remains active work ([`OPEN_PROBLEMS.md L4.4-L4.6`](OPEN_PROBLEMS.md), [`LIMITATIONS.md §7.5`](LIMITATIONS.md)). **The graph stays honest about what it doesn't know.** ARCO does not mint participant facts, temporal regions, role-bearer particulars, or other instance-level content that source evidence does not warrant. Under the Open World Assumption, absent triples mean "not asserted by the reviewed commitments," not "denied." Keeping the graph sparse where evidence is sparse is a project discipline, enforced in code review. diff --git a/docs/MODELING_ROADMAP.md b/docs/MODELING_ROADMAP.md index a70f203..299f743 100644 --- a/docs/MODELING_ROADMAP.md +++ b/docs/MODELING_ROADMAP.md @@ -12,7 +12,7 @@ ARCO is the EU AI Act Annex III classifier I've been building. You hand it a str Mechanically: the classification axioms are written as `owl:equivalentClass` `owl:intersectionOf` definitions with three gates (capability disposition, intended use, affected role) at `03_TECHNICAL_CORE/ontology/ARCO_governance_extension.ttl`. The reasoning runs against BFO 2020, RO 2025-12-17, IAO 2026-03-30, and CCO v1.7-2024-11-03 commitments under OWL-RL. SHACL runs after reasoning to catch documentation inconsistencies. HermiT (under OWL 2 DL) runs in CI as an independent second reasoner so I'm not trusting a single inference profile. Outputs are gated through a failing-by-design provenance contract (`output_manifest_v2.yaml`, `test_output_provenance.py`) that refuses to ship a value unless its source is either graph-backed (named SPARQL query), run-metadata (declared sources), or a labeled documentary block. -Seven fixtures drive this today: two production positives (Sentinel ID for 1(a), Credit Scoring for 5(b)), one negative control (Verification Kiosk, a 1:1 biometric verification system that should not classify under 1(a), since verification is not identification per Recital 22 and Art. 3(41)), two adversarial fixtures (one decoy that injects an `owl:equivalentClass` to test whether the gates collapse on hostile input, one with an anonymous blank-node disposition to test whether the reasoner needs named individuals), and two flag-test fixtures (where all three Annex III gates are satisfied alongside an audit-layer flag like a provider-asserted derogation claim or a fraud-detection process token, to verify classification and audit don't bleed into each other). +Seven fixtures drive this today: two production positives (Sentinel ID for 1(a), Credit Scoring for 5(b)), one negative control (Verification Kiosk, a 1:1 biometric verification system that should not classify under 1(a), since verification is not identification per Recital 15, Recital 17, and the Annex III 1(a) operative carve-out (Article 3(36) defines biometric verification as 1:1; Article 3(41) defines the RBI system)), two adversarial fixtures (one decoy that injects an `owl:equivalentClass` to test whether the gates collapse on hostile input, one with an anonymous blank-node disposition to test whether the reasoner needs named individuals), and two flag-test fixtures (where all three Annex III gates are satisfied alongside an audit-layer flag like a provider-asserted derogation claim or a fraud-detection process token, to verify classification and audit don't bleed into each other). The EU AI Act lists 8 high-risk Annex III categories. I've modeled 2. The other 6 are deferred and I'm honest about that in `LIMITATIONS.md` §1. @@ -65,7 +65,7 @@ These are deliberate choices I've made. Each one is also disclosed in `LIMITATIO - BFO Bucket 5 (Site) and Bucket 6 (TemporalRegion) not modeled. ARCO is a design-time classifier, so runtime spatial/temporal regions don't carry weight at the entailment layer. If I extend ARCO to monitor jurisdiction-specific deployment or substantial modification, these activate (LIMITATIONS §3.7). - Provider-organization-to-ICE structural link not modeled (per PR #62). - Source documents do not auto-promote to reality-side commitments. Promotion is rare, conditional, and human-adjudicated per `docs/EVIDENCE_TO_COMMITMENT_POLICY.md`. I've been deliberate about not letting extraction wire directly to instance TTL. -- Closed-world hardware-incapability claims for software-configurable systems are forbidden. EU AI Act Annex III 1(a) keys on Article 3(36) "intended to be used" language, not on what the hardware in isolation can do (LIMITATIONS §3.5). +- Closed-world hardware-incapability claims for software-configurable systems are forbidden. EU AI Act Annex III 1(a) keys on the "intended to be used for biometric verification" language present in Recital 15, Recital 17, and the Annex III 1(a) operative carve-out, not on what the hardware in isolation can do. Article 3(36) supplies the technical 1:1 verification definition (LIMITATIONS §3.5). --- diff --git a/docs/kiosk_demo_v1/kiosk_demo.md b/docs/kiosk_demo_v1/kiosk_demo.md index 315601b..288c8f7 100644 --- a/docs/kiosk_demo_v1/kiosk_demo.md +++ b/docs/kiosk_demo_v1/kiosk_demo.md @@ -62,7 +62,7 @@ The source explicitly refuses two closed-world claims: - It does NOT claim the underlying hardware is structurally incapable of identification (the hardware is software-configurable). - It does NOT claim what the hardware could be configured to in some other deployment. -Article 3(36) of EU Regulation 2024/1689 keys Annex III 1(a) on intended use, not on raw hardware capability. The source packet describes THIS deployment under THIS configuration. +Annex III item 1(a) of EU Regulation 2024/1689 keys high-risk RBI classification on intended use ("intended to be used for biometric verification" carve-out per Recital 15, Recital 17, and the Annex III 1(a) operative clause), not on raw hardware capability. Article 3(36) supplies the technical 1:1 verification definition. The source packet describes THIS deployment under THIS configuration. (\*) **Real vendor document substitution is `OPEN_PROBLEMS.md L1.1`.** The next concrete move replaces this hypothetical with a real vendor packet (datasheet, conformity declaration, or equivalent), with each claim cited. @@ -169,7 +169,7 @@ The vision is the full input-mile-to-honest-certificate chain wired programmatic - (\*) **Configuration-level aboutness for regulatory ICEs** — `:AnnexIII_Condition_*` ICEs need explicit `iao:is_about` targets; canonical options surfaced (universal-only / particular continuant / multiple constituents). `M-Aboutness-Config-1 / L2.7`. - (\*) **`:CapabilityDisposition` rename to `:Capability`** — Smith-Against-Idiosyncrasy Principle 8 compositional naming fix; mechanical PR. `M-NameDiscipline-1 / L2.8`. - (\*) **Gate 3 role-relationship tightening** — current axiom designates the natural-person role universal but does not pin down whether natural persons are subjects of identification, operators, or another role-in-context. `L2.9`. -- (\*) **Gate 2 use-purpose proxy tightening** — current axiom maps Article 3(36) "intended to be used for" to `cco:prescribes someValuesFrom :Process`; defensible structural proxy but loose. `L2.10`. +- (\*) **Gate 2 use-purpose proxy tightening** — current axiom maps the "intended to be used for" framing (from Recital 15, Recital 17, and Annex III 1(a)) to `cco:prescribes someValuesFrom :Process`; Article 3(36) supplies the technical 1:1 verification definition the axiom presupposes. Defensible structural proxy but loose. `L2.10`. - (\*) **Reasoned-graph and HermiT classification artifact export** — surface the full entailment chain inspectably. `L4.8 parts 1-2`. - (\*) **Prose determination narrative emission** — generated from existing SPARQL gate evidence so a non-ARCO-glossary reader can read the conclusion + the reason in one paragraph. `L4.8 part 3`. - (\*) **Output provenance tightening** — G/M/D field labels, per-field source-query manifest, name/source mismatches across surrounding cert fields. `L4.4-L4.6`.