Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
- Fixed EBA-DEC-002 false positives on integer-typed metrics that use the `pure` unit. `Fact.metric` stores the metric QName in Clark notation (`{namespace}localname`), but `_build_metric_type_map()` was keyed on prefix notation (`eba_met:qXYZ`) taken from the module, so every lookup missed and the validator fell back to unit-based inference, flagging every `pure`-unit integer fact as a percentage. Added a `Fact.metric_qname` property that exposes the prefix-normalised QName and updated the decimals rules (EBA-DEC-001/002/003) to use it. Integer classification is now fully taxonomy-driven — the unit-based fallback in `check_integer_decimals_xml` has been removed. No backward-incompatible changes: `Fact.metric` is unchanged.

## [2.0.1] - 2026-03-25

### Fixed
Expand Down
24 changes: 24 additions & 0 deletions src/xbridge/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ def __init__(self, fact_xml: etree._Element) -> None:
self.decimals: str | None = None
self.context: str | None = None
self.unit: str | None = None
self._metric_qname_cache: Optional[str] = None

self.parse()

Expand All @@ -825,6 +826,29 @@ def parse(self) -> None:
self.decimals = self.fact_xml.attrib.get("decimals")
self.context = self.fact_xml.attrib.get("contextRef")
self.unit = self.fact_xml.attrib.get("unitRef")
self._metric_qname_cache = None

@property
def metric_qname(self) -> Optional[str]:
"""The metric name in EBA/CSV prefix notation (e.g. ``eba_met:qAZH``).

This is the form used by :class:`xbridge.modules.Module` to key its
variables and is therefore the correct string to use when looking a
fact up in a module-derived ``type_map``.

``metric`` (the raw lxml tag, typically in Clark notation such as
``{http://.../dict/met}qAZH``) is left unchanged for backward
compatibility with callers that expect that form.

Returns ``None`` if the metric cannot be normalised (e.g. the
namespace URI does not follow the EBA convention).
"""
if self._metric_qname_cache is not None:
return self._metric_qname_cache
if self.metric is None:
return None
self._metric_qname_cache = _normalize_metric_value(self.metric, self.fact_xml.nsmap)
return self._metric_qname_cache

def __dict__(self) -> Dict[str, Any]: # type: ignore[override]
metric_clean = ""
Expand Down
69 changes: 58 additions & 11 deletions src/xbridge/validation/rules/eba_decimals.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@

from __future__ import annotations

import logging
from typing import Any, Dict, Optional, Tuple

from xbridge.validation._context import ValidationContext
from xbridge.validation._registry import rule_impl
from xbridge.validation.rules._helpers import PURE_VALUES
from xbridge.validation.rules.csv_parameters import _parse_parameters

_logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Metric-type constants (values of Variable._attributes)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -55,6 +58,10 @@
def _build_metric_type_map(ctx: ValidationContext) -> Dict[str, str]:
"""Build a ``{metric_qname: type_string}`` lookup from the Module.

Keys are the full prefix form stored in the module
(e.g. ``eba_met:qAZH``) — matching what :attr:`Fact.metric_qname`
produces after namespace normalisation.

Falls back to an empty dict if no Module is loaded.
The result is cached per module object so the three DEC rules
that call this share a single computation.
Expand All @@ -79,6 +86,43 @@ def _build_metric_type_map(ctx: ValidationContext) -> Dict[str, str]:
return result


def _lookup_metric_type(
fact: Any,
type_map: Dict[str, str],
units: Dict[str, str],
module_present: bool,
) -> Optional[str]:
"""Resolve the metric-type classification for a fact.

Lookup order:
1. ``fact.metric_qname`` in the module's type_map (prefix form).
2. Unit-based inference (``iso4217:*`` → monetary; ``xbrli:pure`` →
percentage). Used when no module is loaded, or when the metric
is not in the module at all.

A debug log is emitted when the module *is* loaded but the metric
could not be found in the type_map — this signals a data-quality or
taxonomy-mismatch issue that is worth surfacing in diagnostics.
"""
qname = getattr(fact, "metric_qname", None) or fact.metric
if qname is not None:
metric_type = type_map.get(qname)
if metric_type is not None:
return metric_type

# Fall back to unit-based inference.
inferred = _infer_type_from_unit(units.get(fact.unit, ""))
if module_present and inferred is not None:
_logger.debug(
"EBA-DEC: metric %r not found in module type_map (size=%d); "
"falling back to unit-based inference → %s",
qname or fact.metric,
len(type_map),
inferred,
)
return inferred


def _monetary_threshold(ctx: ValidationContext) -> int:
"""Return the minimum acceptable ``@decimals`` for monetary facts.

Expand Down Expand Up @@ -142,15 +186,14 @@ def check_monetary_decimals_xml(ctx: ValidationContext) -> None:

type_map = _build_metric_type_map(ctx)
threshold = _monetary_threshold(ctx)
module_present = ctx.module is not None

for fact in facts:
if fact.unit is None or fact.decimals is None:
continue

metric = fact.metric or "?"
metric_type = type_map.get(metric)
if metric_type is None:
metric_type = _infer_type_from_unit(units.get(fact.unit, ""))
metric = fact.metric_qname or fact.metric or "?"
metric_type = _lookup_metric_type(fact, type_map, units, module_present)
if metric_type != _TYPE_MONETARY:
continue

Expand Down Expand Up @@ -184,15 +227,14 @@ def check_percentage_decimals_xml(ctx: ValidationContext) -> None:
return

type_map = _build_metric_type_map(ctx)
module_present = ctx.module is not None

for fact in facts:
if fact.unit is None or fact.decimals is None:
continue

metric = fact.metric or "?"
metric_type = type_map.get(metric)
if metric_type is None:
metric_type = _infer_type_from_unit(units.get(fact.unit, ""))
metric = fact.metric_qname or fact.metric or "?"
metric_type = _lookup_metric_type(fact, type_map, units, module_present)
if metric_type != _TYPE_PERCENTAGE:
continue

Expand Down Expand Up @@ -226,13 +268,18 @@ def check_integer_decimals_xml(ctx: ValidationContext) -> None:
return

type_map = _build_metric_type_map(ctx)
# EBA-DEC-003 relies exclusively on the module-derived type_map:
# there is no reliable unit-based heuristic for integer classification.
# Without a module the rule is effectively a no-op, which matches the
# original behaviour.

for fact in facts:
if fact.unit is None or fact.decimals is None:
continue

metric = fact.metric or "?"
metric_type = type_map.get(metric)
metric = fact.metric_qname or fact.metric or "?"
qname = fact.metric_qname or fact.metric
metric_type = type_map.get(qname) if qname is not None else None
if metric_type != _TYPE_INTEGER:
continue

Expand Down Expand Up @@ -280,7 +327,7 @@ def check_realistic_decimals_xml(ctx: ValidationContext) -> None:
if fact.decimals is None:
continue

metric = fact.metric or "?"
metric = fact.metric_qname or fact.metric or "?"

if _is_inf(fact.decimals):
ctx.add_finding(
Expand Down
Loading
Loading