diff --git a/README.md b/README.md
index f7016af5..bae3ae03 100644
--- a/README.md
+++ b/README.md
@@ -187,7 +187,7 @@ echo "SELECT * FROM v_master LIMIT 5;" | docker exec -i supabase_db_tryvit psql
```
┌─────────────────┐ ┌──────────────────┐ ┌─────────────────────────┐
│ Open Food Facts │────▶│ Python Pipeline │────▶│ PostgreSQL (Supabase) │
-│ API v2 │ │ sql_generator │ │ 231 migrations │
+│ API v2 │ │ sql_generator │ │ 232 migrations │
│ (category tags, │ │ validator │ │ 58 pipeline folders │
│ countries=PL,DE│ │ off_client │ │ products + nutrition │
└─────────────────┘ └──────────────────┘ │ + ingredients + scores │
@@ -319,7 +319,7 @@ tryvit/
│ └── views/ # Reference view definitions
│
├── supabase/
-│ ├── migrations/ # 231 append-only schema migrations
+│ ├── migrations/ # 232 append-only schema migrations
│ ├── seed/ # Reference data seeds
│ ├── tests/ # pgTAP integration tests
│ └── functions/ # Edge Functions (API gateway, push notifications, CAPTCHA)
@@ -531,3 +531,4 @@ This project is licensed under the terms in the [LICENSE](LICENSE) file.
Built with science and care.
+
diff --git a/copilot-instructions.md b/copilot-instructions.md
index e68f689a..0a87ee04 100644
--- a/copilot-instructions.md
+++ b/copilot-instructions.md
@@ -189,7 +189,7 @@ tryvit/
│ │ ├── api-gateway/ # Write-path gateway (rate limiting, validation) (#478)
│ │ └── send-push-notification/ # Push notification handler
│ ├── dr-drill/ # Disaster recovery drill artifacts
-│ └── migrations/ # 231 append-only schema migrations
+│ └── migrations/ # 232 append-only schema migrations
│ ├── 20260207000100_create_schema.sql
│ ├── 20260207000200_baseline.sql
│ ├── 20260207000300_add_chip_metadata.sql
@@ -698,7 +698,7 @@ a mix of `'baked'`, `'fried'`, and `'none'`.
## 7. Migrations
-**Location:** `supabase/migrations/` — managed by Supabase CLI. Currently **231 migrations**.
+**Location:** `supabase/migrations/` — managed by Supabase CLI. Currently **232 migrations**.
**Rules:**
@@ -2705,3 +2705,4 @@ gh issue comment XXX --body "Session complete — "
- Sections are numbered sequentially so any agent can say "per §X.Y" unambiguously.
- When this file is updated, the commit message should include the sections changed (e.g., `chore(docs): copilot-instructions §4 — add ingredient_translations table`).
- When the instructions conflict with a specific issue's acceptance criteria, the issue wins for that specific change — but document the divergence in the PR description.
+
diff --git a/db/ci_post_enrichment.sql b/db/ci_post_enrichment.sql
index 12252256..cd78d3e7 100644
--- a/db/ci_post_enrichment.sql
+++ b/db/ci_post_enrichment.sql
@@ -197,32 +197,150 @@ AND NOT EXISTS (
)
AND EXISTS (SELECT 1 FROM product_ingredient LIMIT 1);
+-- ═══════════════════════════════════════════════════════════════
+-- Step 0e: Classify additive/E-code concern tiers
+-- ═══════════════════════════════════════════════════════════════
+-- Uses strict start-of-string E-code extraction to avoid false positives
+-- such as "200 G", "Type 500", or nutrition text.
+
+WITH normalized AS (
+ SELECT
+ ingredient_id,
+ name_en,
+ CASE
+ WHEN lower(name_en) ~ '^\s*e[ -]?[0-9]{3,4}[a-z]?($|[^a-z0-9])'
+ THEN regexp_replace(
+ substring(lower(name_en) from '^\s*e[ -]?[0-9]{3,4}[a-z]?'),
+ '[^a-z0-9]',
+ '',
+ 'g'
+ )
+ ELSE ''
+ END AS e_code,
+ lower(name_en) AS n
+ FROM ingredient_ref
+ WHERE name_en IS NOT NULL
+),
+classified AS (
+ SELECT
+ ingredient_id,
+ CASE
+ WHEN e_code IN ('e250','e252')
+ OR n LIKE '%sodium nitrite%'
+ OR n LIKE '%potassium nitrate%'
+ THEN 3
+
+ WHEN e_code IN ('e133','e150d','e211','e220','e223','e319','e385','e407','e407a','e621','e950','e951','e955')
+ OR n LIKE '%sodium benzoate%'
+ OR n LIKE '%benzoate%'
+ OR n LIKE '%sucralose%'
+ OR n LIKE '%aspartame%'
+ OR n LIKE '%carrageenan%'
+ OR n LIKE '%sodium metabisulphite%'
+ OR n LIKE '%contains sulphites%'
+ OR n IN ('sulfite','sulfiten','sulphite','sulphites')
+ THEN 2
+
+ WHEN e_code IN (
+ 'e150','e150a','e150b','e150c','e172','e200','e202',
+ 'e281','e282','e338','e339','e340','e341',
+ 'e420','e421','e422','e440','e450','e451','e452',
+ 'e460','e466','e471','e472','e500','e501','e503',
+ 'e960','e960a','e960c'
+ )
+ OR n LIKE '%potassium sorbate%'
+ OR n LIKE '%sorbate%'
+ THEN 1
+
+ ELSE 0
+ END AS target_concern_tier,
+ CASE
+ WHEN e_code IN ('e250','e252')
+ OR n LIKE '%sodium nitrite%'
+ OR n LIKE '%potassium nitrate%'
+ THEN 'Strict additive/E-code classifier: nitrite/nitrate preservative concern'
+
+ WHEN e_code IN ('e133','e150d','e211','e220','e223','e319','e385','e407','e407a','e621','e950','e951','e955')
+ OR n LIKE '%sodium benzoate%'
+ OR n LIKE '%benzoate%'
+ OR n LIKE '%sucralose%'
+ OR n LIKE '%aspartame%'
+ OR n LIKE '%carrageenan%'
+ OR n LIKE '%sodium metabisulphite%'
+ OR n LIKE '%contains sulphites%'
+ OR n IN ('sulfite','sulfiten','sulphite','sulphites')
+ THEN 'Strict additive/E-code classifier: preservative, sweetener, sulphite, or carrageenan concern'
+
+ WHEN e_code IN (
+ 'e150','e150a','e150b','e150c','e172','e200','e202',
+ 'e281','e282','e338','e339','e340','e341',
+ 'e420','e421','e422','e440','e450','e451','e452',
+ 'e460','e466','e471','e472','e500','e501','e503',
+ 'e960','e960a','e960c'
+ )
+ OR n LIKE '%potassium sorbate%'
+ OR n LIKE '%sorbate%'
+ THEN 'Strict additive/E-code classifier: lower concern additive or sorbate'
+
+ ELSE NULL
+ END AS target_concern_reason
+ FROM normalized
+)
+UPDATE ingredient_ref ir
+SET
+ concern_tier = GREATEST(COALESCE(ir.concern_tier, 0), c.target_concern_tier),
+ is_additive = true,
+ concern_reason = CASE
+ WHEN ir.concern_reason IS NULL OR trim(ir.concern_reason) = ''
+ THEN c.target_concern_reason
+ ELSE ir.concern_reason
+ END
+FROM classified c
+WHERE ir.ingredient_id = c.ingredient_id
+ AND c.target_concern_tier > 0
+ AND (
+ COALESCE(ir.concern_tier, 0) < c.target_concern_tier
+ OR ir.is_additive IS DISTINCT FROM true
+ OR ir.concern_reason IS NULL
+ OR trim(ir.concern_reason) = ''
+ );
+
-- ═══════════════════════════════════════════════════════════════
-- Step 1: Populate ingredient_concern_score from actual ingredient data
-- ═══════════════════════════════════════════════════════════════
-- Based on EFSA concern tiers: tier 1 = 15pts, tier 2 = 40pts, tier 3 = 100pts
-- Capped at LEAST(100, SUM(...)) per SCORING_METHODOLOGY.md v3.2
+WITH product_scores AS (
+ SELECT
+ pi.product_id,
+ LEAST(100, SUM(
+ CASE ir.concern_tier
+ WHEN 1 THEN 15
+ WHEN 2 THEN 40
+ WHEN 3 THEN 100
+ ELSE 0
+ END
+ ))::int AS score
+ FROM product_ingredient pi
+ JOIN ingredient_ref ir ON ir.ingredient_id = pi.ingredient_id
+ WHERE COALESCE(ir.concern_tier, 0) > 0
+ GROUP BY pi.product_id
+),
+computed_scores AS (
+ SELECT
+ p.product_id,
+ COALESCE(ps.score, 0)::int AS score
+ FROM products p
+ LEFT JOIN product_scores ps ON ps.product_id = p.product_id
+ WHERE p.is_deprecated IS NOT TRUE
+)
UPDATE products p
-SET ingredient_concern_score = COALESCE(concern.score, 0)
-FROM (
- SELECT pi.product_id,
- LEAST(100, SUM(
- CASE ir.concern_tier
- WHEN 1 THEN 15
- WHEN 2 THEN 40
- WHEN 3 THEN 100
- ELSE 0
- END
- ))::int AS score
- FROM product_ingredient pi
- JOIN ingredient_ref ir ON ir.ingredient_id = pi.ingredient_id
- WHERE ir.concern_tier > 0
- GROUP BY pi.product_id
-) concern
-WHERE concern.product_id = p.product_id
+SET ingredient_concern_score = cs.score
+FROM computed_scores cs
+WHERE p.product_id = cs.product_id
AND p.is_deprecated IS NOT TRUE
- AND p.ingredient_concern_score IS DISTINCT FROM COALESCE(concern.score, 0);
+ AND p.ingredient_concern_score IS DISTINCT FROM cs.score;
-- ═══════════════════════════════════════════════════════════════
-- Step 2: Flag palm oil controversy from actual ingredient data
diff --git a/db/qa/QA__ingredient_quality.sql b/db/qa/QA__ingredient_quality.sql
index ba4a5b35..aab243a2 100644
--- a/db/qa/QA__ingredient_quality.sql
+++ b/db/qa/QA__ingredient_quality.sql
@@ -198,3 +198,43 @@ SELECT '17. ingredient_translations source valid' AS check_name,
FROM ingredient_translations
WHERE source NOT IN ('curated', 'off_api', 'auto_translated', 'user_submitted');
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- 18. concern_tier should not remain all-zero once product_ingredient exists
+-- ═══════════════════════════════════════════════════════════════════════════
+SELECT '18. concern_tier populated when product ingredients exist' AS check_name,
+ CASE
+ WHEN NOT EXISTS (SELECT 1 FROM product_ingredient) THEN 0
+ WHEN EXISTS (
+ SELECT 1
+ FROM product_ingredient pi
+ JOIN ingredient_ref ir ON ir.ingredient_id = pi.ingredient_id
+ WHERE COALESCE(ir.concern_tier, 0) > 0
+ ) THEN 0
+ ELSE 1
+ END AS violations;
+
+-- ═══════════════════════════════════════════════════════════════════════════
+-- 19. products linked to concern-tier ingredients should have concern score
+-- ═══════════════════════════════════════════════════════════════════════════
+SELECT '19. concern-tier linked products have concern score' AS check_name,
+ CASE
+ WHEN NOT EXISTS (
+ SELECT 1
+ FROM product_ingredient pi
+ JOIN ingredient_ref ir ON ir.ingredient_id = pi.ingredient_id
+ WHERE COALESCE(ir.concern_tier, 0) > 0
+ ) THEN 0
+ ELSE (
+ SELECT COUNT(*)
+ FROM (
+ SELECT DISTINCT p.product_id
+ FROM products p
+ JOIN product_ingredient pi ON pi.product_id = p.product_id
+ JOIN ingredient_ref ir ON ir.ingredient_id = pi.ingredient_id
+ WHERE p.is_deprecated IS NOT TRUE
+ AND COALESCE(ir.concern_tier, 0) > 0
+ AND COALESCE(p.ingredient_concern_score, 0) = 0
+ ) x
+ )
+ END AS violations;
diff --git a/db/qa/QA__scoring_formula_tests.sql b/db/qa/QA__scoring_formula_tests.sql
index a815341a..22f5f37e 100644
--- a/db/qa/QA__scoring_formula_tests.sql
+++ b/db/qa/QA__scoring_formula_tests.sql
@@ -643,3 +643,4 @@ FROM (
WHERE
NOT ((api_score_explanation(p.product_id))->'summary') ? 'conflicts'
OR NOT ((api_score_explanation(p.product_id))->'summary') ? 'qualified_headline';
+
diff --git a/docs/PRODUCTION_DATA.md b/docs/PRODUCTION_DATA.md
index 8c6281a3..6610173f 100644
--- a/docs/PRODUCTION_DATA.md
+++ b/docs/PRODUCTION_DATA.md
@@ -45,7 +45,7 @@
### 1.2 Migration Inventory
-**Location:** `supabase/migrations/` — **231 migration files**, append-only.
+**Location:** `supabase/migrations/` — **232 migration files**, append-only.
**Naming convention:** `YYYYMMDDHHMMSS_description.sql` (Supabase CLI timestamps). Files are applied in lexicographic sort order.
@@ -329,7 +329,7 @@ There is no standalone `init_db_structure.py` script. Database initialization fo
```
supabase db reset
- → Applies all 231 migrations in order (supabase/migrations/*.sql)
+ → Applies all 232 migrations in order (supabase/migrations/*.sql)
→ Runs seed.sql (empty — no-op)
→ Schema is ready
@@ -511,7 +511,7 @@ Backup = supabase/migrations/*.sql + db/pipelines/*.sql
```
Since the database can be fully reconstructed from:
-1. 231 migration files (schema + functions + views)
+1. 232 migration files (schema + functions + views)
2. 25 × 4 pipeline SQL files (all product data)
3. `ci_post_pipeline.sql` (data fixups)
@@ -657,3 +657,4 @@ Supabase Auth URL configuration required:
7. **Consider EAN-based canonical keys** to eliminate `product_id` instability across environments
8. **Add rollback documentation** — steps to revert a bad deployment
9. **Create staging environment** — intermediate Supabase project for pre-production validation
+
diff --git a/supabase/migrations/20260605223000_backfill_ingredient_concern_tiers.sql b/supabase/migrations/20260605223000_backfill_ingredient_concern_tiers.sql
new file mode 100644
index 00000000..06a5e0ce
--- /dev/null
+++ b/supabase/migrations/20260605223000_backfill_ingredient_concern_tiers.sql
@@ -0,0 +1,142 @@
+-- Migration: Backfill ingredient concern tiers from strict additive/E-code classification.
+-- Rollback: Manual rollback only. Restore affected ingredient_ref.concern_tier/is_additive/concern_reason and products.ingredient_concern_score from backup if needed.
+-- Backfill ingredient_ref.concern_tier using strict additive/E-code classification.
+-- Safe to re-run. Does not downgrade existing higher concern tiers.
+
+BEGIN;
+
+WITH normalized AS (
+ SELECT
+ ingredient_id,
+ name_en,
+ CASE
+ WHEN lower(name_en) ~ '^\s*e[ -]?[0-9]{3,4}[a-z]?($|[^a-z0-9])'
+ THEN regexp_replace(
+ substring(lower(name_en) from '^\s*e[ -]?[0-9]{3,4}[a-z]?'),
+ '[^a-z0-9]',
+ '',
+ 'g'
+ )
+ ELSE ''
+ END AS e_code,
+ lower(name_en) AS n
+ FROM ingredient_ref
+ WHERE name_en IS NOT NULL
+),
+classified AS (
+ SELECT
+ ingredient_id,
+ CASE
+ WHEN e_code IN ('e250','e252')
+ OR n LIKE '%sodium nitrite%'
+ OR n LIKE '%potassium nitrate%'
+ THEN 3
+
+ WHEN e_code IN ('e133','e150d','e211','e220','e223','e319','e385','e407','e407a','e621','e950','e951','e955')
+ OR n LIKE '%sodium benzoate%'
+ OR n LIKE '%benzoate%'
+ OR n LIKE '%sucralose%'
+ OR n LIKE '%aspartame%'
+ OR n LIKE '%carrageenan%'
+ OR n LIKE '%sodium metabisulphite%'
+ OR n LIKE '%contains sulphites%'
+ OR n IN ('sulfite','sulfiten','sulphite','sulphites')
+ THEN 2
+
+ WHEN e_code IN (
+ 'e150','e150a','e150b','e150c','e172','e200','e202',
+ 'e281','e282','e338','e339','e340','e341',
+ 'e420','e421','e422','e440','e450','e451','e452',
+ 'e460','e466','e471','e472','e500','e501','e503',
+ 'e960','e960a','e960c'
+ )
+ OR n LIKE '%potassium sorbate%'
+ OR n LIKE '%sorbate%'
+ THEN 1
+
+ ELSE 0
+ END AS target_concern_tier,
+ CASE
+ WHEN e_code IN ('e250','e252')
+ OR n LIKE '%sodium nitrite%'
+ OR n LIKE '%potassium nitrate%'
+ THEN 'Strict additive/E-code classifier: nitrite/nitrate preservative concern'
+
+ WHEN e_code IN ('e133','e150d','e211','e220','e223','e319','e385','e407','e407a','e621','e950','e951','e955')
+ OR n LIKE '%sodium benzoate%'
+ OR n LIKE '%benzoate%'
+ OR n LIKE '%sucralose%'
+ OR n LIKE '%aspartame%'
+ OR n LIKE '%carrageenan%'
+ OR n LIKE '%sodium metabisulphite%'
+ OR n LIKE '%contains sulphites%'
+ OR n IN ('sulfite','sulfiten','sulphite','sulphites')
+ THEN 'Strict additive/E-code classifier: preservative, sweetener, sulphite, or carrageenan concern'
+
+ WHEN e_code IN (
+ 'e150','e150a','e150b','e150c','e172','e200','e202',
+ 'e281','e282','e338','e339','e340','e341',
+ 'e420','e421','e422','e440','e450','e451','e452',
+ 'e460','e466','e471','e472','e500','e501','e503',
+ 'e960','e960a','e960c'
+ )
+ OR n LIKE '%potassium sorbate%'
+ OR n LIKE '%sorbate%'
+ THEN 'Strict additive/E-code classifier: lower concern additive or sorbate'
+
+ ELSE NULL
+ END AS target_concern_reason
+ FROM normalized
+)
+UPDATE ingredient_ref ir
+SET
+ concern_tier = GREATEST(COALESCE(ir.concern_tier, 0), c.target_concern_tier),
+ is_additive = true,
+ concern_reason = CASE
+ WHEN ir.concern_reason IS NULL OR trim(ir.concern_reason) = ''
+ THEN c.target_concern_reason
+ ELSE ir.concern_reason
+ END
+FROM classified c
+WHERE ir.ingredient_id = c.ingredient_id
+ AND c.target_concern_tier > 0
+ AND (
+ COALESCE(ir.concern_tier, 0) < c.target_concern_tier
+ OR ir.is_additive IS DISTINCT FROM true
+ OR ir.concern_reason IS NULL
+ OR trim(ir.concern_reason) = ''
+ );
+
+WITH product_scores AS (
+ SELECT
+ pi.product_id,
+ LEAST(100, SUM(
+ CASE ir.concern_tier
+ WHEN 1 THEN 15
+ WHEN 2 THEN 40
+ WHEN 3 THEN 100
+ ELSE 0
+ END
+ ))::int AS score
+ FROM product_ingredient pi
+ JOIN ingredient_ref ir ON ir.ingredient_id = pi.ingredient_id
+ WHERE COALESCE(ir.concern_tier, 0) > 0
+ GROUP BY pi.product_id
+),
+computed_scores AS (
+ SELECT
+ p.product_id,
+ COALESCE(ps.score, 0)::int AS score
+ FROM products p
+ LEFT JOIN product_scores ps ON ps.product_id = p.product_id
+ WHERE p.is_deprecated IS NOT TRUE
+)
+UPDATE products p
+SET ingredient_concern_score = cs.score
+FROM computed_scores cs
+WHERE p.product_id = cs.product_id
+ AND p.is_deprecated IS NOT TRUE
+ AND p.ingredient_concern_score IS DISTINCT FROM cs.score;
+
+COMMIT;
+