ericsocrat · ericsocrat · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
@@ -187,7 +187,7 @@ echo "SELECT * FROM v_master LIMIT 5;" | docker exec -i supabase_db_tryvit psql
 ```
 ┌─────────────────┐     ┌──────────────────┐     ┌─────────────────────────┐
 │  Open Food Facts │────▶│  Python Pipeline │────▶│  PostgreSQL (Supabase)  │
-│  API v2          │     │  sql_generator   │     │  230 migrations         │
+│  API v2          │     │  sql_generator   │     │  231 migrations         │
 │  (category tags, │     │  validator       │     │  58 pipeline folders    │
 │   countries=PL,DE│     │  off_client      │     │  products + nutrition   │
 └─────────────────┘     └──────────────────┘     │  + ingredients + scores │
@@ -319,7 +319,7 @@ tryvit/
 │   └── views/                       # Reference view definitions
 │
 ├── supabase/
-│   ├── migrations/                  # 230 append-only schema migrations
+│   ├── migrations/                  # 231 append-only schema migrations
 │   ├── seed/                        # Reference data seeds
 │   ├── tests/                       # pgTAP integration tests
 │   └── functions/                   # Edge Functions (API gateway, push notifications, CAPTCHA)

@@ -246,6 +246,48 @@ if ($Enrich) {
     # Step 2: Apply the generated migration
     $enrichMigrations = Get-ChildItem -Path (Join-Path $PSScriptRoot "supabase" "migrations") -Filter "*populate_ingredients_allergens*" | Sort-Object Name | Select-Object -Last 1
     if ($enrichMigrations) {
+        Write-Host "Step 2a: Validating percent ranges in generated migration..." -ForegroundColor Yellow
+        $validatePercentScript = @"
+import re
+import sys
+from pathlib import Path
+
+path = Path(sys.argv[1])
+line_re = re.compile(
+    r"\(\s*'(?P<country>[^']+)'\s*,\s*'(?P<ean>[^']+)'\s*,\s*'(?P<name>(?:[^']|'')*)'\s*,\s*(?P<position>\d+)\s*,\s*(?P<pct>NULL|-?\d+(?:\.\d+)?)::numeric\s*,\s*(?P<pct_est>NULL|-?\d+(?:\.\d+)?)::numeric"
+)
+
+bad = []
+for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
+    m = line_re.search(line)
+    if not m:
+        continue
+    pct = m.group("pct")
+    if pct != "NULL":
+        val = float(pct)
+        if val < 0 or val > 100:
+            bad.append((lineno, m.group("country"), m.group("ean"), m.group("name"), val))
+
+if bad:
+    print(f"Found {len(bad)} out-of-range product_ingredient.percent values in {path.name}", file=sys.stderr)
+    for row in bad[:5]:
+        print(
+            f"  line {row[0]} country={row[1]} ean={row[2]} ingredient='{row[3]}' percent={row[4]}",
+            file=sys.stderr,
+        )
+    sys.exit(1)
+
+print("Percent range validation passed.")
+"@
+        & $pythonExe -c $validatePercentScript $enrichMigrations.FullName
+        if ($LASTEXITCODE -eq 0) {
+            Write-Host "  ✓ Percent range validation passed" -ForegroundColor Green
+        }
+        else {
+            Write-Host "  ✗ Percent range validation FAILED" -ForegroundColor Red
+            exit 1
+        }
+
         Write-Host "Step 2: Applying enrichment migration: $($enrichMigrations.Name)..." -ForegroundColor Yellow
         $output = Get-Content $enrichMigrations.FullName -Raw | docker exec -i $CONTAINER psql -U $DB_USER -d $DB_NAME -v ON_ERROR_STOP=1 2>&1
         if ($LASTEXITCODE -eq 0) {

@@ -189,7 +189,7 @@ tryvit/
 │   │   ├── api-gateway/             # Write-path gateway (rate limiting, validation) (#478)
 │   │   └── send-push-notification/  # Push notification handler
 │   ├── dr-drill/                    # Disaster recovery drill artifacts
-│   └── migrations/                  # 230 append-only schema migrations
+│   └── migrations/                  # 231 append-only schema migrations
 │       ├── 20260207000100_create_schema.sql
 │       ├── 20260207000200_baseline.sql
 │       ├── 20260207000300_add_chip_metadata.sql
@@ -698,7 +698,7 @@ a mix of `'baked'`, `'fried'`, and `'none'`.
 
 ## 7. Migrations
 
-**Location:** `supabase/migrations/` — managed by Supabase CLI. Currently **230 migrations**.
+**Location:** `supabase/migrations/` — managed by Supabase CLI. Currently **231 migrations**.
 
 **Rules:**
 

@@ -45,7 +45,7 @@
 
 ### 1.2 Migration Inventory
 
-**Location:** `supabase/migrations/` — **230 migration files**, append-only.
+**Location:** `supabase/migrations/` — **231 migration files**, append-only.
 
 **Naming convention:** `YYYYMMDDHHMMSS_description.sql` (Supabase CLI timestamps). Files are applied in lexicographic sort order.
 
@@ -329,7 +329,7 @@ There is no standalone `init_db_structure.py` script. Database initialization fo
 
 ```
 supabase db reset
-  → Applies all 230 migrations in order (supabase/migrations/*.sql)
+  → Applies all 231 migrations in order (supabase/migrations/*.sql)
   → Runs seed.sql (empty — no-op)
   → Schema is ready
 
@@ -511,7 +511,7 @@ Backup = supabase/migrations/*.sql + db/pipelines/*.sql
 ```
 
 Since the database can be fully reconstructed from:
-1. 230 migration files (schema + functions + views)
+1. 231 migration files (schema + functions + views)
 2. 25 × 4 pipeline SQL files (all product data)
 3. `ci_post_pipeline.sql` (data fixups)
 

@@ -285,12 +285,81 @@ def _clamp_percent_estimate(pct_est: float | None) -> float | None:
     return round(max(pct_est, 0), 2)
 
 
+def _normalize_percent_value(
+    value: object,
+    *,
+    field_name: str,
+    ean: str,
+    ingredient_name: str,
+    anomalies: list[dict],
+) -> float | None:
+    """Normalize percent-like fields.
+
+    - Preserve NULL/empty as NULL
+    - Preserve valid numeric range [0, 100]
+    - If outside range or unparsable, set NULL and record anomaly
+    """
+    if value is None:
+        return None
+
+    if isinstance(value, str):
+        raw = value.strip()
+        if raw == "":
+            return None
+        # Some OFF entries may use comma decimal separators.
+        raw = raw.replace(",", ".")
+    else:
+        raw = str(value)
+
+    try:
+        parsed = float(raw)
+    except (TypeError, ValueError):
+        anomalies.append(
+            {
+                "ean": ean,
+                "ingredient_name": ingredient_name,
+                "field": field_name,
+                "value": value,
+                "reason": "unparsable",
+            }
+        )
+        return None
+
+    if parsed < 0 or parsed > 100:
+        anomalies.append(
+            {
+                "ean": ean,
+                "ingredient_name": ingredient_name,
+                "field": field_name,
+                "value": value,
+                "reason": "out_of_range",
+            }
+        )
+        return None
+
+    return round(parsed, 2)
+
+
+def _find_invalid_percent_rows(rows: list[dict]) -> list[dict]:
+    """Return rows that still contain out-of-range percent values."""
+    bad: list[dict] = []
+    for r in rows:
+        pct = r.get("percent")
+        if pct is not None and (pct < 0 or pct > 100):
+            bad.append(r)
+        pct_est = r.get("percent_estimate")
+        if pct_est is not None and (pct_est < 0 or pct_est > 100):
+            bad.append(r)
+    return bad
+
+
 def process_ingredients(
     off_product: dict,
     country: str,
     ean: str,
     ingredient_lookup: dict[str, int],
     new_ingredients: dict[str, dict],
+    percent_anomalies: list[dict] | None = None,
 ) -> list[dict]:
     """Extract ingredient rows for a product.
 
@@ -301,6 +370,9 @@ def process_ingredients(
     if not ingredients:
         return []
 
+    if percent_anomalies is None:
+        percent_anomalies = []
+
     rows: list[dict] = []
 
     def process_item(item: dict, pos: int, is_sub: bool, parent_name: str | None) -> int:
@@ -330,8 +402,20 @@ def process_item(item: dict, pos: int, is_sub: bool, parent_name: str | None) ->
                 "ean": ean,
                 "ingredient_name": display_name,
                 "position": pos,
-                "percent": item.get("percent"),
-                "percent_estimate": _clamp_percent_estimate(item.get("percent_estimate")),
+                "percent": _normalize_percent_value(
+                    item.get("percent"),
+                    field_name="percent",
+                    ean=ean,
+                    ingredient_name=display_name,
+                    anomalies=percent_anomalies,
+                ),
+                "percent_estimate": _normalize_percent_value(
+                    item.get("percent_estimate"),
+                    field_name="percent_estimate",
+                    ean=ean,
+                    ingredient_name=display_name,
+                    anomalies=percent_anomalies,
+                ),
                 "is_sub_ingredient": is_sub,
                 "parent_ingredient_name": parent_name if is_sub else None,
             }
@@ -765,6 +849,7 @@ def main():
     all_ingredient_rows = []
     all_allergen_rows = []
     new_ingredients: dict[str, dict] = {}
+    percent_anomalies: list[dict] = []
 
     stats = {
         "processed": 0,
@@ -800,6 +885,7 @@ def main():
                     product["ean"],
                     ingredient_lookup,
                     new_ingredients,
+                    percent_anomalies,
                 )
                 if ing_rows:
                     stats["with_ingredients"] += 1
@@ -825,6 +911,26 @@ def main():
     print(f"  Total ingredient rows: {len(all_ingredient_rows)}")
     print(f"  Total allergen rows: {len(all_allergen_rows)}")
 
+    if percent_anomalies:
+        print(f"  Percent anomalies sanitized to NULL: {len(percent_anomalies)}")
+        sample = percent_anomalies[:5]
+        for a in sample:
+            print(
+                f"    - EAN {a['ean']}, ingredient '{a['ingredient_name']}', {a['field']}={a['value']} ({a['reason']})"
+            )
+
+    invalid_rows = _find_invalid_percent_rows(all_ingredient_rows)
+    if invalid_rows:
+        print("\nERROR: Invalid percent values remain after normalization.", file=sys.stderr)
+        for row in invalid_rows[:5]:
+            print(
+                "  "
+                f"EAN {row.get('ean')} ingredient '{row.get('ingredient_name')}' "
+                f"percent={row.get('percent')} percent_estimate={row.get('percent_estimate')}",
+                file=sys.stderr,
+            )
+        sys.exit(1)
+
     sql = generate_migration(all_ingredient_rows, all_allergen_rows, new_ingredients, stats)
 
     MIGRATION_FILE.write_text(sql, encoding="utf-8")