diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ff6e26..4290190 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Fixed EBA-DEC-002 false positives on integer-typed metrics that use the `pure` unit. `Fact.metric` stores the metric QName in Clark notation (`{namespace}localname`), but `_build_metric_type_map()` was keyed on prefix notation (`eba_met:qXYZ`) taken from the module, so every lookup missed and the validator fell back to unit-based inference, flagging every `pure`-unit integer fact as a percentage. Added a `Fact.metric_qname` property that exposes the prefix-normalised QName and updated the decimals rules (EBA-DEC-001/002/003) to use it. Integer classification is now fully taxonomy-driven — the unit-based fallback in `check_integer_decimals_xml` has been removed. No backward-incompatible changes: `Fact.metric` is unchanged. + ## [2.0.1] - 2026-03-25 ### Fixed diff --git a/src/xbridge/instance.py b/src/xbridge/instance.py index b0efa34..ea6cdf7 100644 --- a/src/xbridge/instance.py +++ b/src/xbridge/instance.py @@ -815,6 +815,7 @@ def __init__(self, fact_xml: etree._Element) -> None: self.decimals: str | None = None self.context: str | None = None self.unit: str | None = None + self._metric_qname_cache: Optional[str] = None self.parse() @@ -825,6 +826,29 @@ def parse(self) -> None: self.decimals = self.fact_xml.attrib.get("decimals") self.context = self.fact_xml.attrib.get("contextRef") self.unit = self.fact_xml.attrib.get("unitRef") + self._metric_qname_cache = None + + @property + def metric_qname(self) -> Optional[str]: + """The metric name in EBA/CSV prefix notation (e.g. ``eba_met:qAZH``). + + This is the form used by :class:`xbridge.modules.Module` to key its + variables and is therefore the correct string to use when looking a + fact up in a module-derived ``type_map``. + + ``metric`` (the raw lxml tag, typically in Clark notation such as + ``{http://.../dict/met}qAZH``) is left unchanged for backward + compatibility with callers that expect that form. + + Returns ``None`` if the metric cannot be normalised (e.g. the + namespace URI does not follow the EBA convention). + """ + if self._metric_qname_cache is not None: + return self._metric_qname_cache + if self.metric is None: + return None + self._metric_qname_cache = _normalize_metric_value(self.metric, self.fact_xml.nsmap) + return self._metric_qname_cache def __dict__(self) -> Dict[str, Any]: # type: ignore[override] metric_clean = "" diff --git a/src/xbridge/validation/rules/eba_decimals.py b/src/xbridge/validation/rules/eba_decimals.py index 5f2a37f..671a800 100644 --- a/src/xbridge/validation/rules/eba_decimals.py +++ b/src/xbridge/validation/rules/eba_decimals.py @@ -15,6 +15,7 @@ from __future__ import annotations +import logging from typing import Any, Dict, Optional, Tuple from xbridge.validation._context import ValidationContext @@ -22,6 +23,8 @@ from xbridge.validation.rules._helpers import PURE_VALUES from xbridge.validation.rules.csv_parameters import _parse_parameters +_logger = logging.getLogger(__name__) + # --------------------------------------------------------------------------- # Metric-type constants (values of Variable._attributes) # --------------------------------------------------------------------------- @@ -55,6 +58,10 @@ def _build_metric_type_map(ctx: ValidationContext) -> Dict[str, str]: """Build a ``{metric_qname: type_string}`` lookup from the Module. + Keys are the full prefix form stored in the module + (e.g. ``eba_met:qAZH``) — matching what :attr:`Fact.metric_qname` + produces after namespace normalisation. + Falls back to an empty dict if no Module is loaded. The result is cached per module object so the three DEC rules that call this share a single computation. @@ -79,6 +86,43 @@ def _build_metric_type_map(ctx: ValidationContext) -> Dict[str, str]: return result +def _lookup_metric_type( + fact: Any, + type_map: Dict[str, str], + units: Dict[str, str], + module_present: bool, +) -> Optional[str]: + """Resolve the metric-type classification for a fact. + + Lookup order: + 1. ``fact.metric_qname`` in the module's type_map (prefix form). + 2. Unit-based inference (``iso4217:*`` → monetary; ``xbrli:pure`` → + percentage). Used when no module is loaded, or when the metric + is not in the module at all. + + A debug log is emitted when the module *is* loaded but the metric + could not be found in the type_map — this signals a data-quality or + taxonomy-mismatch issue that is worth surfacing in diagnostics. + """ + qname = getattr(fact, "metric_qname", None) or fact.metric + if qname is not None: + metric_type = type_map.get(qname) + if metric_type is not None: + return metric_type + + # Fall back to unit-based inference. + inferred = _infer_type_from_unit(units.get(fact.unit, "")) + if module_present and inferred is not None: + _logger.debug( + "EBA-DEC: metric %r not found in module type_map (size=%d); " + "falling back to unit-based inference → %s", + qname or fact.metric, + len(type_map), + inferred, + ) + return inferred + + def _monetary_threshold(ctx: ValidationContext) -> int: """Return the minimum acceptable ``@decimals`` for monetary facts. @@ -142,15 +186,14 @@ def check_monetary_decimals_xml(ctx: ValidationContext) -> None: type_map = _build_metric_type_map(ctx) threshold = _monetary_threshold(ctx) + module_present = ctx.module is not None for fact in facts: if fact.unit is None or fact.decimals is None: continue - metric = fact.metric or "?" - metric_type = type_map.get(metric) - if metric_type is None: - metric_type = _infer_type_from_unit(units.get(fact.unit, "")) + metric = fact.metric_qname or fact.metric or "?" + metric_type = _lookup_metric_type(fact, type_map, units, module_present) if metric_type != _TYPE_MONETARY: continue @@ -184,15 +227,14 @@ def check_percentage_decimals_xml(ctx: ValidationContext) -> None: return type_map = _build_metric_type_map(ctx) + module_present = ctx.module is not None for fact in facts: if fact.unit is None or fact.decimals is None: continue - metric = fact.metric or "?" - metric_type = type_map.get(metric) - if metric_type is None: - metric_type = _infer_type_from_unit(units.get(fact.unit, "")) + metric = fact.metric_qname or fact.metric or "?" + metric_type = _lookup_metric_type(fact, type_map, units, module_present) if metric_type != _TYPE_PERCENTAGE: continue @@ -226,13 +268,18 @@ def check_integer_decimals_xml(ctx: ValidationContext) -> None: return type_map = _build_metric_type_map(ctx) + # EBA-DEC-003 relies exclusively on the module-derived type_map: + # there is no reliable unit-based heuristic for integer classification. + # Without a module the rule is effectively a no-op, which matches the + # original behaviour. for fact in facts: if fact.unit is None or fact.decimals is None: continue - metric = fact.metric or "?" - metric_type = type_map.get(metric) + metric = fact.metric_qname or fact.metric or "?" + qname = fact.metric_qname or fact.metric + metric_type = type_map.get(qname) if qname is not None else None if metric_type != _TYPE_INTEGER: continue @@ -280,7 +327,7 @@ def check_realistic_decimals_xml(ctx: ValidationContext) -> None: if fact.decimals is None: continue - metric = fact.metric or "?" + metric = fact.metric_qname or fact.metric or "?" if _is_inf(fact.decimals): ctx.add_finding( diff --git a/tests/test_eba_decimals.py b/tests/test_eba_decimals.py index 4fbbade..0cafc65 100644 --- a/tests/test_eba_decimals.py +++ b/tests/test_eba_decimals.py @@ -560,3 +560,268 @@ def test_missing_params_no_findings(self) -> None: } ) assert _run_csv(data, "EBA-DEC-004") == [] + + +# ===================================================================== +# Regression tests for #XXX — EBA-DEC-002 false positives on integer +# metrics that happen to use the 'pure' unit. +# +# Before the fix: +# * ``Fact.metric`` was stored as the lxml Clark-notation tag, but +# ``type_map`` was keyed by the EBA prefix form (e.g. ``eba_met:qFGE``). +# * ``type_map.get(fact.metric)`` therefore always returned ``None`` +# for real-world instances, silently triggering the unit-based +# fallback, which classified every ``pure``-unit fact as percentage. +# * Integer-typed metrics with pure unit (e.g. COREP ALM ``qAZH``, +# ``qCCG``, ``qDGB``) were then incorrectly flagged by EBA-DEC-002 +# for having ``@decimals < 4``. +# +# These tests pin the correct behaviour: +# * ``Fact.metric_qname`` returns the normalised prefix form. +# * ``_build_metric_type_map`` indexes by both prefix and local name. +# * When a module is loaded, classification is driven by the module, +# not by the unit heuristic. +# ===================================================================== + +# Pillar 3 CODIS module has examples of all four metric types, and the +# fixture is small enough to load quickly. +_PILLAR3_CODIS_SCHEMA = ( + "http://www.eba.europa.eu/eu/fr/xbrl/crr/fws/pillar3/4.1/mod/codis.xsd" +) + +# Metrics picked from src/xbridge/modules/codis_pillar3_4.1.json +_CODIS_MONETARY_METRIC = "qHNJ" +_CODIS_PERCENTAGE_METRIC = "qADL" +_CODIS_INTEGER_METRIC = "qFGE" +_CODIS_DECIMAL_METRIC = "qIIL" + + +def _xbrl_with_module(body: str, prefix: str = "eba_met") -> bytes: + """Build a minimal XBRL document with a real schemaRef + eba_met xmlns. + + The schemaRef points at the Pillar 3 CODIS module so that the + validation engine loads a real :class:`Module` into the context. + ``prefix`` controls the xmlns prefix used for fact elements — this + lets us test prefix-version drift (``eba_met`` vs ``eba_met_4.0``). + """ + ns = ( + 'xmlns:xbrli="http://www.xbrl.org/2003/instance" ' + 'xmlns:link="http://www.xbrl.org/2003/linkbase" ' + 'xmlns:xlink="http://www.w3.org/1999/xlink" ' + 'xmlns:find="http://www.eurofiling.info/xbrl/ext/filing-indicators" ' + f'xmlns:{prefix}="http://www.eba.europa.eu/xbrl/crr/dict/met"' + ) + schema_ref = ( + f'' + ) + return ( + f'' + f'{schema_ref}{body}' + ).encode() + + +class TestEBADEC002IntegerMetricRegression: + """Integer metrics with pure unit MUST NOT trigger EBA-DEC-002. + + This is the core regression that this fix exists for. + """ + + def setup_method(self) -> None: + _ensure_registered() + + def test_integer_metric_pure_unit_decimals_0_no_finding(self) -> None: + """Integer-type metric (qFGE, $decimalsInteger) with decimals=0 is + correct and MUST NOT trigger EBA-DEC-002 (the percentage rule).""" + body = ( + _unit("u1", "xbrli:pure") + + _context() + + _fact(metric=f"eba_met:{_CODIS_INTEGER_METRIC}", unit="u1", decimals="0") + ) + xml = _xbrl_with_module(body) + assert _run(xml, "EBA-DEC-002") == [] + + def test_integer_metric_pure_unit_decimals_0_triggers_dec003(self) -> None: + """decimals=0 on an integer fact is the taxonomy-correct value — + EBA-DEC-003 must accept it.""" + body = ( + _unit("u1", "xbrli:pure") + + _context() + + _fact(metric=f"eba_met:{_CODIS_INTEGER_METRIC}", unit="u1", decimals="0") + ) + xml = _xbrl_with_module(body) + assert _run(xml, "EBA-DEC-003") == [] + + def test_integer_metric_pure_unit_decimals_4_triggers_dec003(self) -> None: + """decimals=4 on an integer fact is wrong — EBA-DEC-003 must flag it.""" + body = ( + _unit("u1", "xbrli:pure") + + _context() + + _fact(metric=f"eba_met:{_CODIS_INTEGER_METRIC}", unit="u1", decimals="4") + ) + xml = _xbrl_with_module(body) + findings = _run(xml, "EBA-DEC-003") + assert len(findings) == 1 + assert findings[0].severity == Severity.ERROR + assert "integer" in findings[0].message.lower() + + +class TestEBADEC002PercentageMetricWithModule: + """Percentage facts still flagged correctly when a module is loaded.""" + + def setup_method(self) -> None: + _ensure_registered() + + def test_percentage_metric_low_decimals_still_flagged(self) -> None: + """Percentage metric qADL with decimals=2 must still trigger DEC-002. + + qADL is $decimalsPercentage — even with a module loaded the + module-driven path must flag low decimals (end-to-end sanity check). + """ + body = ( + _unit("u1", "xbrli:pure") + + _context() + + _fact(metric=f"eba_met:{_CODIS_PERCENTAGE_METRIC}", unit="u1", decimals="2") + ) + xml = _xbrl_with_module(body) + findings = _run(xml, "EBA-DEC-002") + assert len(findings) == 1 + assert findings[0].severity == Severity.ERROR + + def test_percentage_metric_high_decimals_ok(self) -> None: + body = ( + _unit("u1", "xbrli:pure") + + _context() + + _fact(metric=f"eba_met:{_CODIS_PERCENTAGE_METRIC}", unit="u1", decimals="6") + ) + xml = _xbrl_with_module(body) + assert _run(xml, "EBA-DEC-002") == [] + + +class TestEBADEC001MonetaryMetricWithModule: + """Monetary classification still works when a module is loaded.""" + + def setup_method(self) -> None: + _ensure_registered() + + def test_monetary_metric_low_decimals_flagged(self) -> None: + body = ( + _unit("u1", "iso4217:EUR") + + _context() + + _fact(metric=f"eba_met:{_CODIS_MONETARY_METRIC}", unit="u1", decimals="-7") + ) + xml = _xbrl_with_module(body) + findings = _run(xml, "EBA-DEC-001") + assert len(findings) == 1 + assert findings[0].severity == Severity.ERROR + + +class TestFactMetricNormalisation: + """Unit tests for the new ``Fact.metric_qname`` property. These isolate + the normalisation logic from the rule engine.""" + + def _parse(self, xml_bytes: bytes): + from xbridge.instance import XmlInstance + + with NamedTemporaryFile(suffix=".xbrl", delete=False) as tmp: + tmp.write(xml_bytes) + tmp.flush() + path = tmp.name + try: + inst = XmlInstance(path) + return list(inst.facts or []) + finally: + os.unlink(path) + + def test_metric_raw_is_clark_notation(self) -> None: + """Backwards compatibility: ``Fact.metric`` stays in Clark notation.""" + xml = _xbrl( + _unit("u1", "xbrli:pure") + _context() + _fact(metric="eba_met:mi1") + ) + facts = self._parse(xml) + assert len(facts) == 1 + assert facts[0].metric is not None + assert facts[0].metric.startswith("{") + assert facts[0].metric.endswith("}mi1") + + def test_metric_qname_is_prefix_form(self) -> None: + """``Fact.metric_qname`` returns the EBA prefix form.""" + xml = _xbrl( + _unit("u1", "xbrli:pure") + _context() + _fact(metric="eba_met:mi1") + ) + facts = self._parse(xml) + assert facts[0].metric_qname == "eba_met:mi1" + + def test_metric_qname_cached(self) -> None: + """Repeated access uses the cache, not re-parses the nsmap each time.""" + xml = _xbrl( + _unit("u1", "xbrli:pure") + _context() + _fact(metric="eba_met:mi1") + ) + facts = self._parse(xml) + first = facts[0].metric_qname + second = facts[0].metric_qname + # Same underlying string object — cheap identity check. + assert first is second + + +class TestLookupMetricTypeFallbackLogging: + """Ensure the debug log fires only when a module is present but the + metric is missing from its type_map.""" + + def setup_method(self) -> None: + _ensure_registered() + + def test_no_log_when_no_module(self, caplog) -> None: + """Without a module, the fallback is the expected path — silent.""" + import logging + + from xbridge.validation.rules import eba_decimals as mod + + class _DummyFact: + metric = "{http://x}qZZZ" + metric_qname = "eba_met:qZZZ" + unit = "u1" + + with caplog.at_level(logging.DEBUG, logger=mod.__name__): + out = mod._lookup_metric_type( + _DummyFact(), type_map={}, units={"u1": "xbrli:pure"}, module_present=False + ) + assert out == "$decimalsPercentage" # unit-based fallback for pure + assert not any("falling back" in r.message for r in caplog.records) + + def test_log_when_module_missing_metric(self, caplog) -> None: + """With a module present but no entry for the metric, log fires.""" + import logging + + from xbridge.validation.rules import eba_decimals as mod + + class _DummyFact: + metric = "{http://x}qZZZ" + metric_qname = "eba_met:qZZZ" + unit = "u1" + + with caplog.at_level(logging.DEBUG, logger=mod.__name__): + mod._lookup_metric_type( + _DummyFact(), type_map={}, units={"u1": "xbrli:pure"}, module_present=True + ) + assert any("falling back" in r.message for r in caplog.records) + + def test_no_log_when_module_contains_metric(self, caplog) -> None: + """Happy path: module has the metric — no fallback, no log.""" + import logging + + from xbridge.validation.rules import eba_decimals as mod + + class _DummyFact: + metric = "{http://x}qZZZ" + metric_qname = "eba_met:qZZZ" + unit = "u1" + + with caplog.at_level(logging.DEBUG, logger=mod.__name__): + out = mod._lookup_metric_type( + _DummyFact(), + type_map={"eba_met:qZZZ": "$decimalsInteger"}, + units={"u1": "xbrli:pure"}, + module_present=True, + ) + assert out == "$decimalsInteger" + assert not any("falling back" in r.message for r in caplog.records)