diff --git a/.ai/specs/focus-consolidation-dynamic-view/design.md b/.ai/specs/focus-consolidation-dynamic-view/design.md index 3ecfe229..4c6e2237 100644 --- a/.ai/specs/focus-consolidation-dynamic-view/design.md +++ b/.ai/specs/focus-consolidation-dynamic-view/design.md @@ -178,7 +178,28 @@ For each target column in FOCUS 1.2, and for each source table: 1. **Column exists with matching type**: Use column directly → `billedcost` 2. **Column exists with different type**: Cast to target type → `CAST(billingperiodstart AS TIMESTAMP)` -3. **Column missing**: Use typed NULL → `CAST(NULL AS DOUBLE) AS billedcost` +3. **Column missing by canonical name but alias found**: Use alias with SQL rename → `provider providername` +4. **Column missing by canonical name, alias found with different type**: Cast alias → `CAST(provider AS VARCHAR) providername` +5. **Column truly missing**: Use typed NULL → `CAST(NULL AS DOUBLE) AS billedcost` + +### Column Name Aliases + +Some providers use non-canonical column names in their FOCUS exports. The `COLUMN_ALIASES` dict maps canonical FOCUS column names to known alternative names: + +```python +COLUMN_ALIASES = { + 'providername': ['provider'], + 'publishername': ['publisher'], + 'invoiceissuername': ['invoiceissuer'], + 'regionid': ['region'], + 'regionname': ['region'], +} +``` + +The alias lookup occurs after the canonical name check fails, and before the NULL fallback. This means: +- If a provider ships a conformant column name (e.g. `providername`), it is used directly (step 1/2) +- If only the alias exists (e.g. `provider`), it is mapped via SQL alias (step 3/4) +- If neither exists, a typed NULL is emitted (step 5) ### Special case: `billing_period` @@ -231,9 +252,17 @@ Each SELECT block has exactly the same columns in the same order, ensuring UNION *For any* source table column set and *for any* target FOCUS 1.2 column, `generate_select_for_table` should produce: - The column name directly if it exists in the source with matching type - A `CAST( AS )` expression if the column exists but with a different type -- A `CAST(NULL AS )` expression if the column is missing from the source +- An ` ` expression if the column is missing but a known alias exists with matching type +- A `CAST( AS ) ` expression if the column is missing but a known alias exists with a different type +- A `CAST(NULL AS )` expression if neither the column nor any alias exists in the source + +**Validates: Requirements 2.2, 2.3, 2.6** + +### Property 2a: Alias precedence + +*For any* source table that contains BOTH a canonical column name AND an alias for the same target column, the canonical name takes precedence and the alias is never used. -**Validates: Requirements 2.2, 2.3** +**Validates: Requirement 2.6** ### Property 3: billing_period special handling @@ -293,5 +322,7 @@ Unit tests cover: - Edge case: table with all FOCUS 1.2 columns (no NULLs needed) - Edge case: table with only FOCUS 1.0 minimum columns (many NULLs) - Edge case: billing_period as partition key vs regular column vs absent +- Edge case: column alias resolution (e.g. OCI `provider` → `providername`) +- Edge case: canonical name takes precedence over alias when both exist - Integration: `create_or_update_view` in `common.py` delegates to `FocusConsolidationView` when type is `dynamic_focus_consolidation` - YAML update: `focus.yaml` has correct type and `columns:` dict (no placeholder SQL) diff --git a/.ai/specs/focus-consolidation-dynamic-view/requirements.md b/.ai/specs/focus-consolidation-dynamic-view/requirements.md index c8180105..f6a8c9f1 100644 --- a/.ai/specs/focus-consolidation-dynamic-view/requirements.md +++ b/.ai/specs/focus-consolidation-dynamic-view/requirements.md @@ -36,6 +36,7 @@ This creates maintenance burden for customers who need to manually update the vi 2.3. Column types are cast to match FOCUS 1.2 specification when needed, using a type compatibility system 2.4. Special handling for `billing_period` column (partition vs computed) 2.5. Array-type source columns that map to scalar targets produce NULL instead of invalid CAST +2.6. Known column name aliases are resolved for providers using non-canonical names (e.g. OCI `provider` → `providername`) ### 3. As a customer running `--recursive` updates **I want** `focus_consolidation_view` to be updated automatically @@ -160,6 +161,19 @@ The YAML uses Athena-friendly type names: - Array-type source columns mapping to scalar targets produce `CAST(NULL AS )` instead of invalid CAST - Unknown types default to `VARCHAR` +#### Column Name Aliases +Some providers (e.g. OCI) use non-canonical column names in their FOCUS exports that differ from the FOCUS specification. The view generator supports a `COLUMN_ALIASES` mapping that maps canonical FOCUS column names to known alternative names: + +| Canonical FOCUS Name | Known Alias (OCI) | +|---|---| +| `providername` | `provider` | +| `publishername` | `publisher` | +| `invoiceissuername` | `invoiceissuer` | +| `regionid` | `region` | +| `regionname` | `region` | + +**Logic**: The canonical column name is always checked first. If not found in the source table, aliases are checked as a fallback. If an alias is found, it is used with a SQL alias to map it to the canonical name (e.g. `provider providername`). Once a provider updates their export to use the canonical FOCUS names, the alias is no longer needed and the direct match takes precedence. + #### NULL Placeholders - Use typed NULLs for missing columns via `_resolve_athena_type()` - Example: `CAST(NULL AS VARCHAR)` for missing string columns diff --git a/cid/helpers/focus_consolidation.py b/cid/helpers/focus_consolidation.py index b4774e07..4c818cef 100644 --- a/cid/helpers/focus_consolidation.py +++ b/cid/helpers/focus_consolidation.py @@ -13,6 +13,18 @@ logger = logging.getLogger(__name__) +# Known column name aliases for providers whose FOCUS exports use non-canonical names. +# Key: canonical FOCUS column name (lowercased). +# Value: list of known alternative column names (lowercased) used by providers (e.g. OCI). +# The canonical name is always checked first; aliases are only used as a fallback. +COLUMN_ALIASES = { + 'providername': ['provider'], + 'publishername': ['publisher'], + 'invoiceissuername': ['invoiceissuer'], + 'regionid': ['region'], + 'regionname': ['region'], +} + # Minimum columns that identify a table as FOCUS-compliant (FOCUS 1.0 core). # A table must have ALL of these columns (case-insensitive) to be considered a FOCUS table. # NOTE: Only columns common across ALL providers (AWS, Azure, OCI, GCP) are listed here. @@ -237,7 +249,7 @@ def _column_expression(self, col_name, target_type, source_columns, partition_ke if col_name == 'billing_period': return f'{self._billing_period_expr(source_columns, partition_keys)} {col_name}' - # Column exists in source + # Column exists in source by canonical name if col_name in source_columns: source_type = source_columns[col_name] if _types_compatible(source_type, target_type): @@ -248,7 +260,18 @@ def _column_expression(self, col_name, target_type, source_columns, partition_ke # Types differ — cast return f'CAST({col_name} AS {_resolve_athena_type(target_type)}) {col_name}' - # Column missing — typed NULL placeholder + # Column missing by canonical name — check known aliases (e.g. OCI uses + # 'provider' instead of 'providername') + for alias in COLUMN_ALIASES.get(col_name, []): + if alias in source_columns: + source_type = source_columns[alias] + if _types_compatible(source_type, target_type): + return f'{alias} {col_name}' + if _normalize_type(source_type).startswith('array'): + return f'{_null_as(target_type)} {col_name}' + return f'CAST({alias} AS {_resolve_athena_type(target_type)}) {col_name}' + + # Column truly missing — typed NULL placeholder return f'{_null_as(target_type)} {col_name}' @staticmethod