From 8d8ca218e1ae4e1fb1bffedeafc85a589a1eb4ba Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sat, 27 Jun 2026 18:09:45 +0200 Subject: [PATCH 1/8] docs(dev): write-latency roadmap (validated cost model + layered fix) Records the validated 6-LIST warm-write cost model, the two root causes (un-GC'd _versions/; re-resolving latest by listing), and the layered fix (GC + capture-once reuse), plus how commit-graph-table retirement feeds in. Linked from docs/dev/index.md next to the RFC-013 docs. --- docs/dev/index.md | 1 + docs/dev/write-latency-roadmap.md | 79 +++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 docs/dev/write-latency-roadmap.md diff --git a/docs/dev/index.md b/docs/dev/index.md index be986020..e48402dc 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -94,6 +94,7 @@ Working documents for in-flight feature work. Removed when the work lands. | Provider-independent embedding configuration — one resolved `EmbeddingConfig` + sealed provider enum (Gemini/OpenAI/Mock), identity recorded in the schema IR, query-time same-space validation, NFR floor | [rfc-012-embedding-provider-config.md](rfc-012-embedding-provider-config.md) | | Write-path latency — capture-once `WriteTxn`, version-pinned opens, one `GraphPublishAuthority` fed declarative `PublishPlan`s, manifest-authoritative lineage, epoch fence, bounded history (compaction + cleanup), and an IO-counted cost contract (`iss-write-s3-roundtrip-amplification`, `iss-991`) | [rfc-013-write-path-latency.md](rfc-013-write-path-latency.md) | | RFC-013 handoff — current-state map, latest validation, and concrete next actions for finishing write-path latency and correctness work | [handoff-rfc-013-write-path.md](handoff-rfc-013-write-path.md) | +| Write-latency roadmap — validated cost model (the 6-LIST warm-write trace), the two root causes (un-GC'd `_versions/`; re-resolving latest by listing), and the layered fix (GC + capture-once reuse); how commit-graph-table retirement feeds in | [write-latency-roadmap.md](write-latency-roadmap.md) | ## Boundary diff --git a/docs/dev/write-latency-roadmap.md b/docs/dev/write-latency-roadmap.md new file mode 100644 index 00000000..d6287d1a --- /dev/null +++ b/docs/dev/write-latency-roadmap.md @@ -0,0 +1,79 @@ +# Write-latency roadmap (validated cost model + layered fix) + +**Type:** planning roadmap (forward-looking; not yet implemented) +**Companion to:** [rfc-013-write-path-latency.md](rfc-013-write-path-latency.md) (design) and [handoff-rfc-013-write-path.md](handoff-rfc-013-write-path.md) (current state + next actions) +**Status:** validated against Lance 7.0.0 source + a measured S3/RustFS trace of the `a7d4cba5` (#307) binary; **re-validated unchanged against HEAD `0dcdcf5a` (#308)** — see "Currency" below + +This doc records the **validated** root-cause analysis of single-row write latency and the layered, correct-by-design fix, so the analysis is not lost between sessions. It also records how the separate commit-graph-table **retirement** work (see the strand-and-rebuild plan) feeds into it. + +## Currency (re-validated at HEAD `0dcdcf5a`, #308) + +#308 ("Stage the delete path; retire the inline-delete residual", MR-A) is a **pure delete-path refactor**: it moved `delete_where` off the inline `Dataset::delete` HEAD-advance onto Lance 7.0's two-phase `stage_delete` (`DeleteBuilder::execute_uncommitted`) + `commit_staged`. The **insert path is untouched** — every step of the cost model below re-traced identically (the `stage_merge_insert` + `commit_staged` open/commit count is unchanged; WriteTxn "collapse #1/#4", the probe-gated `occ_snapshot_for_branch`, and the publisher chain all survive). Two consequences worth recording: (1) deletes are now staged like inserts, so this cost model — and the layered fix — **now apply uniformly to deletes too** (before #308 a delete was an inline-commit residual with a different shape); (2) the sole remaining inline-commit residual is `create_vector_index`, and D2 (constructive XOR destructive per query) is now a **permanent by-design boundary, not a gap**. Lance stays pinned at 7.0.0, so Root-cause-A's `commit.rs` / `cleanup.rs` references are unchanged. + +## Problem + +A single keyed-node insert on `main` against the production graph (Cloudflare Container + R2, 343 MB, never cleaned) measured **~7.2s median**. The cost is not the writes — it is read/list amplification, dominated by repeated "resolve latest version" listings of `_versions/` prefixes. + +## Validated cost model of one warm write + +A warm server insert of one keyed node on `main` performs **6 `_versions/` LISTs + 1 full `__manifest` scan + 1 recovery LIST + ~5 writes**, every operation reconciled to code and matching the measured served trace exactly (3 `__manifest/_versions/` lists, 3 `node:SyncState/_versions/` lists, ~132 `__manifest` reads, 1 recovery list, 8 PUT + 1 POST): + +| # | Operation | Cost | Source | +|---|---|---|---| +| 1 | recovery sidecar list | 1 `__recovery/` LIST | `recovery.rs` heal | +| 2 | schema-contract validate | 3 text GETs (not manifest) | `schema_state.rs` | +| 3 | branch resolve (warm) | 0 I/O (in-memory coord) | `omnigraph.rs` `resolved_branch_target` | +| 4 | per-table mutation open | **0** — WriteTxn returns `None` (RFC-013 collapse #1) | `table_ops.rs` | +| 5 | staging open (`reopen_for_mutation`) | **1 `node:SyncState/_versions/` LIST** (opens at *latest*) | `table_store.rs` `open_dataset_head_for_write` | +| 6 | OCC re-capture probe | **1 `__manifest/_versions/` LIST** (`latest_version_id`) | `manifest.rs` `probe_latest_version` | +| 7 | data-table drift probe | **1 `node:SyncState/_versions/` LIST** | `staging.rs` | +| 8 | sidecar PUT | 1 PUT | `staging.rs` `write_sidecar` | +| 9 | `commit_staged` (data commit) | **1 `node:SyncState/_versions/` LIST** (Lance commit) + PUTs | `table_store.rs` | +| 10 | index-build reopen | **0** — reuses handle (RFC-013 collapse #4) | `table_ops.rs` | +| 11 | `load_publish_state` | **1 `__manifest/_versions/` LIST + 1 full scan (~132 reads)** | `publisher.rs` | +| 12 | publish merge-insert commit | **1 `__manifest/_versions/` LIST** (Lance commit) + 1 POST | `publisher.rs` `merge_rows` | +| 13 | sidecar DELETE | 1 DELETE | `mutation.rs` | + +The WriteTxn (RFC-013 step 3b) and #307 already eliminated several opens (steps 4, 10) and the redundant publish scans. What remains is structural. + +## Two independent root causes + +**Root cause A — per-LIST cost grows with history (un-GC'd `_versions/`).** On S3/R2, Lance resolves "latest version" via `resolve_version_from_listing` (`lance-table-7.0.0/src/io/commit.rs:552`). Even though V2 reverse-sorted naming puts the latest first, it reads **up to 1000 entries as a lexical-order sanity check** (`commit.rs:584`, `valid_manifests.take(999)`). So each LIST pulls a full ~1000-key page (~205 KB, ~500 ms on R2). `cleanup` covers only catalog data tables — **the internal tables are never version-GC'd** (`optimize.rs` `all_table_keys(&db.catalog())`), so `__manifest/_versions/` grows one entry per commit forever. The version hint that would skip the list is **unavailable on S3/R2** — Lance gates it to non-lexical stores (`commit.rs:327`, `uses_version_hint = enabled && !list_is_lexically_ordered`); `LANCE_USE_VERSION_HINT` can only *disable* it. NOTE: `optimize` already *compacts* the internal tables (RFC-013 step 2), which bounds the `__manifest` data **scan** (step 11), but compaction adds a version — only `cleanup` shrinks the `_versions/` **list**. + +**Root cause B — the write re-resolves "latest" 6× by listing instead of reusing a known version.** Each boundary (table open, OCC probe, table commit, manifest open, manifest commit) independently asks "what's latest?" via a list. Opening at a *known* version is list-free: `DatasetBuilder::from_uri(loc).with_version(N)` reconstructs the exact V2 filename and does one GET — the read path already uses this (the held-handle cache). The write path deliberately opens HEAD and never consults the pinned pointer, even though the `__manifest` `table_version` row already stores `manifest_path` + `version` + `e_tag` (`TableVersionMetadata`). + +## The layered fix (each layer correct on its own) + +Target end state: **a warm single-writer insert costs O(keep-N + writes), independent of total history, with the publish CAS as the sole correctness authority.** Ordered by impact-per-risk. + +### Layer 1 — version-GC the internal table(s) (attacks Root cause A; biggest win) +Bring `__manifest` into `cleanup` with a small keep-window (e.g. keep 20–50), shrinking every LIST from ~1000 keys (~205 KB, ~500 ms) to ~20–50 keys (~5 KB, ~40 ms) — a ~10× cut to the dominant term, reusing the `cleanup_all_tables` loop. Lance's `cleanup_old_versions` supports this directly (`CleanupPolicy { before_version, before_timestamp }`, always preserves latest, honors tags — `lance-7.0.0/src/dataset/cleanup.rs`). +- **Correctness:** a *manual, quiesced, sidecar-refusing* cleanup is correct **without** the Q8 watermark (the resurrection race only exists under concurrent live writers). Time-travel below the window is lost — the same tradeoff data-table cleanup already makes. +- **Immediate operational fix:** a one-shot quiesced `__manifest` cleanup on the production graph takes ~7s → ~1s today, zero code risk. +- **Connection to the retirement work:** after the commit-graph-table retirement, `_graph_commits`/`_graph_commit_actors` no longer exist, so Layer 1's GC scope is **just `__manifest`** (originally three internal tables). Retiring those tables also removes their 2 cold-open LISTs entirely — an orthogonal cold-start win. + +### Layer 2 — Q8 durable monotonic watermark (makes Layer 1 safe under live writers) +Lance creates versions with a bare `PutMode::Create` and no monotonic guard, so a stalled writer can resurrect a GC'd version = a silent lost write on R2/S3. To run GC automatically while serving writes, add a durable boundary watermark (a Lance boundary **tag**, which `cleanup_old_versions` protects): GC advances the floor before deleting; every writer/open rejects `version ≤ boundary`. Invariant-level, touches the open path; correctly deferred in RFC-013 (§ step 2b / Q8). See [rfc-013-write-path-latency.md](rfc-013-write-path-latency.md) and [handoff-rfc-013-write-path.md](handoff-rfc-013-write-path.md) for the full design. + +### Layer 3 — open data tables at the pinned manifest version (attacks Root cause B) +On the **non-strict** insert/merge path, route `reopen_for_mutation` through the version-pinned opener (`open_table_dataset(location, pinned_version, session)`) instead of `Dataset::open` at HEAD. The `__manifest` row already pins the exact version + e_tag, and the non-strict path already skips the strict version check — so opening at the pinned version is information-equivalent and **list-free** (removes step 5, folds the drift probe). The publisher CAS + `latest_version_id` drift guard remain the concurrency fences. + +### Layer 4 — optimistic warm publish (attacks Root cause B) +The publisher loop runs cold `load_publish_state` (open + full scan) on every attempt, then arbitrates via the merge-insert row-level CAS — and the CAS is the stated authority. Thread the warm coordinator's already-open `__manifest` handle + in-memory `known_state` (folded after every commit, RFC-013 PR2 #1b) into **attempt 0**; fall through to today's cold `load_publish_state` on attempts 1..N **only when the CAS actually conflicts**. A current warm view (single-writer server, the common case) commits with zero manifest open/scan. Scope to non-strict ops; strict ops (update/delete/schema) keep the cold fresh-read drift check. + +## Why this is the optimal shape (not a symptomatic patch) + +- It is the natural extension of the WriteTxn "capture-once" architecture (RFC-013 step 3b) that already eliminated the per-table mutation opens — Layers 3–4 finish the job for the publish + commit opens. +- It separates **correctness (the CAS) from optimization (every read/probe)**, matching invariant 15: the optimistic path is probe-free; the cold path runs only under genuine contention. Write cost becomes O(1) with no contention, O(contention) only under real concurrency. +- It introduces **no parallel copy of version state** (deny-listed) — Lance + the manifest stay the single source of truth; we stop *re-deriving* "latest" from a cold list on the hot path (the "cold re-derivation" anti-pattern, invariant 15). +- Explicitly **not** pursued: the version hint (unavailable on S3) or an external manifest store (a drift-prone parallel pointer). GC + capture-once reuse is the substrate-respecting path. + +## Sequencing + +0. **Prereq (separate plan): retire `_graph_commits.lance` + `_graph_commit_actors.lance`.** Simplifies Layer 1 to `__manifest`-only and removes 2 cold-open LISTs. (Strand-and-rebuild storage-versioning plan.) +1. **Operational now:** one-shot quiesced `__manifest` cleanup on the production graph (~7s → ~1s, no code). +2. **Layer 1:** wire `__manifest` into the `cleanup` command (manual/quiesced + sidecar-refuse) — the 80/20. +3. **Layers 3 + 4:** open-at-pinned-version + optimistic warm publish — remove the redundant round-trips; bring a warm write to ~2 cheap Lance-commit lists + writes. +4. **Layer 2:** Q8 watermark — only when GC must run continuously under live writers; heaviest, correctly deferred. + +Layers 1+3+4 are independently shippable, each correctness-preserving, composing to the scalable end state. Each lands with `write_cost_s3.rs` extensions asserting the per-write LIST count drops and stays flat across commit-history depth. From e98b7bb787ff5df446ce3922c57fa728c68100b3 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 12:45:22 +0200 Subject: [PATCH 2/8] =?UTF-8?q?feat(engine)!:=20strand=20storage=20version?= =?UTF-8?q?ing=20=E2=80=94=20one=20internal-schema=20version,=20no=20in-pl?= =?UTF-8?q?ace=20migration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set MIN_SUPPORTED == CURRENT == 4: this binary reads exactly one `__manifest` internal-schema version and refuses any older graph on open with a rebuild-via-export/import message, instead of migrating it in place. Storage format changes become a deliberate cutover, not a permanently-carried in-place migration — the pre-release "complexity must be earned" contract. Delete the entire in-place migration apparatus and everything that existed only to support it: the `migrate_vN` arms + dispatcher + stamp-bump helpers + the schema-version-floor tripwire; `migrate_on_open` (both open modes now refuse); the legacy `_graph_commits.lance` readers + the v3 test fixtures + migration tests + `migration.v3_to_v4.*` failpoints + the two surface guards that pinned Lance variants only the migration matched on; and `state::merge_lineage_rows`. Keep `read_stamp` / `stamp_current_version` / `set_stamp` / `refuse_if_stamp_unsupported` — the seam a future one-shot converter plugs into. `load_commit_cache_for_branch` now reads the `__manifest` projection unconditionally (sub-v4 graphs are refused at open). Adds `sub_current_graph_is_refused_on_open_with_rebuild_hint`. The commit-graph TABLES are still created/used as branch-ref ledgers — their retirement (CommitGraph -> pure `__manifest` projection) is the next commit. BREAKING CHANGE: a graph created by omnigraph <= 0.7.2 (internal schema v3) is refused on open. Rebuild it: `omnigraph export` with the old release, then `omnigraph init` + `omnigraph load` with this one. Data, vectors, and blobs are preserved; commit history and branches are not. --- crates/omnigraph/src/db/commit_graph.rs | 538 +------------- crates/omnigraph/src/db/manifest.rs | 108 +-- .../omnigraph/src/db/manifest/migrations.rs | 505 ++----------- crates/omnigraph/src/db/manifest/publisher.rs | 24 +- crates/omnigraph/src/db/manifest/state.rs | 65 -- crates/omnigraph/src/db/manifest/tests.rs | 685 +----------------- crates/omnigraph/src/db/omnigraph.rs | 25 +- crates/omnigraph/src/failpoints.rs | 49 +- crates/omnigraph/tests/failpoints.rs | 136 +--- .../omnigraph/tests/lance_surface_guards.rs | 57 -- 10 files changed, 121 insertions(+), 2071 deletions(-) diff --git a/crates/omnigraph/src/db/commit_graph.rs b/crates/omnigraph/src/db/commit_graph.rs index fb618741..d0f18620 100644 --- a/crates/omnigraph/src/db/commit_graph.rs +++ b/crates/omnigraph/src/db/commit_graph.rs @@ -2,10 +2,9 @@ use std::collections::{HashMap, VecDeque}; use std::sync::Arc; use arrow_array::{ - Array, RecordBatch, RecordBatchIterator, StringArray, TimestampMicrosecondArray, UInt64Array, + RecordBatch, RecordBatchIterator, StringArray, TimestampMicrosecondArray, UInt64Array, }; use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit}; -use futures::TryStreamExt; use lance::Dataset; use lance::dataset::{WriteMode, WriteParams}; use lance_file::version::LanceFileVersion; @@ -142,7 +141,7 @@ impl CommitGraph { // `cleanup` race) must not wedge the read, so a typed not-found yields a // `None` handle — a subsequent `create_branch` then surfaces the loud // error. Any OTHER open error (transient IO / corrupt) still propagates, - // matching the `force_delete_branch` / `read_legacy_commit_cache` idiom. + // matching the `force_delete_branch` not-found idiom. let dataset = match dataset.checkout_branch(branch).await { Ok(ds) => Some(ds), Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => None, @@ -558,42 +557,14 @@ fn commits_to_batch(commits: &[GraphCommit]) -> Result { .map_err(|e| OmniError::Lance(e.to_string())) } -/// Build the in-memory commit cache for `branch`, choosing the source by the -/// branch manifest's internal-schema stamp (RFC-013 step 4 forward/back-compat): -/// -/// - stamp ≥ v4 (post-Phase-7, the normal case): the `__manifest` lineage -/// projection — `graph_commit`/`graph_head` rows folded into the publish CAS. -/// - stamp < v4 (a pre-Phase-7 graph not yet migrated): the legacy -/// `_graph_commits.lance` read. This is the **transitional v3 fallback** that -/// lets a READ-ONLY open of an un-migrated graph still see correct history — -/// a read-only open never runs the v3→v4 backfill (it must not write), so -/// without this gate it would read an empty DAG from `__manifest`. A -/// read-write open backfills `__manifest` on its first write and thereafter -/// takes the projection branch. -/// -/// Both sources pick the head with `should_replace_head`, so the cache is -/// identical regardless of which branch is taken. Remove the fallback once no -/// graph below internal-schema v4 remains. +/// Build the in-memory commit cache for `branch` from the `__manifest` +/// graph-lineage projection (RFC-013 Phase 7) — the single source of lineage on a +/// v4 graph. Sub-v4 graphs are refused at open (`refuse_if_stamp_unsupported`), +/// so there is no legacy `_graph_commits.lance` fallback. async fn load_commit_cache_for_branch( root_uri: &str, branch: Option<&str>, ) -> Result<(HashMap, Option)> { - let stamp = crate::db::manifest::internal_schema_stamp_at(root_uri, branch).await?; - // Defense-in-depth: refuse a branch whose stamp this binary cannot serve — - // newer than CURRENT, or below MIN_SUPPORTED — for the same reason the main - // read path does (`refuse_if_internal_schema_unsupported`). A `> CURRENT` stamp - // means a newer binary wrote a shape we can't read, so the projection below - // would misread it; a `< MIN` stamp predates the legacy readers this binary - // still carries. Not a live hole today: migrations run main-first - // (`migrate_on_open` migrates main; each branch migrates on its own first - // write), so main's stamp bounds every branch's and the main read path already - // refuses first. The guard closes the gap if that ordering is ever weakened. - crate::db::manifest::refuse_if_stamp_unsupported(stamp)?; - if stamp < crate::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION { - // Transitional: un-migrated v3 graph — read lineage from the legacy - // `_graph_commits.lance` so reads (incl. read-only opens) see history. - return read_legacy_commit_cache(root_uri, branch).await; - } load_commit_cache_from_manifest(root_uri, branch).await } @@ -628,177 +599,6 @@ async fn load_commit_cache_from_manifest( Ok((commit_by_id, head_commit)) } -/// Read the legacy `_graph_commits.lance` (+ its actor sidecar) for `branch` -/// into an in-memory cache — the transitional source for graphs not yet -/// migrated to internal-schema v4 (RFC-013 step 4). Two callers, both -/// transitional: the v3→v4 migration backfill (which copies these rows into -/// `__manifest`) and the read-only v3 fallback in `CommitGraph::open*`. Returns -/// `(commit_by_id, head)`, with the head picked by `should_replace_head` — -/// identical to the manifest projection. A genuinely ABSENT (not-found) commit -/// dataset or actor sidecar yields an empty cache (no head); any OTHER open error -/// (transient IO / corrupt file) propagates loudly rather than being read as -/// "empty" — a swallow here would let the v3→v4 migration backfill nothing and -/// still stamp v4, orphaning the real lineage permanently. This keeps the legacy -/// readers alive while any v3 graph survives; once no graph is below v4 it can -/// retire. -pub(crate) async fn read_legacy_commit_cache( - root_uri: &str, - branch: Option<&str>, -) -> Result<(HashMap, Option)> { - let root = root_uri.trim_end_matches('/'); - let commits_uri = graph_commits_uri(root); - let commits_open = match crate::failpoints::maybe_fail_lance_open("migration.v3_to_v4.legacy_open") - { - Ok(()) => Dataset::open(&commits_uri).await, - Err(injected) => Err(injected), - }; - let mut dataset = match commits_open { - Ok(dataset) => dataset, - // An ABSENT commits dataset is the legitimate "no legacy data" signal — - // a graph with no `_graph_commits.lance` (or none on this branch) yields - // an empty cache. But ONLY a genuine not-found gets that treatment: a - // transient/corrupt open (IO / CorruptFile / …) must propagate, never be - // read as "empty". The v3→v4 migration calls this once before stamping - // v4; swallowing a non-not-found error here would backfill nothing and - // stamp v4 anyway, orphaning the real lineage permanently (the migration - // never re-runs, and the v3 fallback is then disabled). Lance maps an - // object-store NotFound to `DatasetNotFound`; the variant match (vs an - // existence probe) is exactly right and not over-strict — pinned by - // `lance_surface_guards.rs::dataset_open_missing_returns_not_found_variant`. - Err(lance::Error::DatasetNotFound { .. }) | Err(lance::Error::NotFound { .. }) => { - return Ok((HashMap::new(), None)); - } - Err(e) => return Err(OmniError::Lance(e.to_string())), - }; - if let Some(branch) = branch.filter(|b| *b != "main") { - dataset = dataset - .checkout_branch(branch) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - } - - // The actor sidecar may be absent (older graphs without authored commits); - // an empty actor map then leaves every commit's actor `None`. It is read - // FLAT (no branch checkout): the pre-Phase-7 commit graph never forked the - // actor dataset — actors are keyed by `graph_commit_id` globally — so a - // branch's commits resolve their actor from the same single actor table. - // This matches the live `CommitGraph::open_at_branch`, which also opens the - // actor dataset on main while checking out the branch only on the commits - // dataset. - let actors_open = - match crate::failpoints::maybe_fail_lance_open("migration.v3_to_v4.legacy_open") { - Ok(()) => Dataset::open(&graph_commit_actors_uri(root)).await, - Err(injected) => Err(injected), - }; - let actor_by_commit_id = match actors_open { - Ok(actor_dataset) => load_commit_actor_cache(&actor_dataset).await?, - // An ABSENT actor sidecar is benign (older graphs without authored - // commits) — every commit's actor stays `None`. A not-found is therefore - // the empty-map signal. But a CORRUPT/transient actor open must NOT be - // read as "no authors": silently wiping all authorship and then stamping - // v4 is the same permanent-loss hole as the commits arm, so anything - // other than not-found propagates. (Same variant contract, different - // rationale — absence is normal here, error is not.) - Err(lance::Error::DatasetNotFound { .. }) | Err(lance::Error::NotFound { .. }) => { - HashMap::new() - } - Err(e) => return Err(OmniError::Lance(e.to_string())), - }; - - load_commit_cache(&dataset, &actor_by_commit_id).await -} - -async fn load_commit_cache( - dataset: &Dataset, - actor_by_commit_id: &HashMap, -) -> Result<(HashMap, Option)> { - let batches: Vec = dataset - .scan() - .try_into_stream() - .await - .map_err(|e| OmniError::Lance(e.to_string()))? - .try_collect() - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - - let mut commits = load_commits_from_batches(&batches)?; - for commit in &mut commits { - commit.actor_id = actor_by_commit_id - .get(commit.graph_commit_id.as_str()) - .cloned(); - } - let mut commit_by_id = HashMap::with_capacity(commits.len()); - let mut head_commit = None; - for commit in commits { - if should_replace_head(head_commit.as_ref(), &commit) { - head_commit = Some(commit.clone()); - } - commit_by_id.insert(commit.graph_commit_id.clone(), commit); - } - Ok((commit_by_id, head_commit)) -} - -async fn load_commit_actor_cache(dataset: &Dataset) -> Result> { - let batches: Vec = dataset - .scan() - .try_into_stream() - .await - .map_err(|e| OmniError::Lance(e.to_string()))? - .try_collect() - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - - let mut actors = HashMap::new(); - for batch in batches { - let commit_ids = string_column(&batch, "graph_commit_id", "commit actor registry")?; - let actor_ids = string_column(&batch, "actor_id", "commit actor registry")?; - for row in 0..batch.num_rows() { - actors.insert( - commit_ids.value(row).to_string(), - actor_ids.value(row).to_string(), - ); - } - } - Ok(actors) -} - -fn load_commits_from_batches(batches: &[RecordBatch]) -> Result> { - let mut commits = Vec::new(); - for batch in batches { - let ids = string_column(batch, "graph_commit_id", "commit graph")?; - let branches = string_column(batch, "manifest_branch", "commit graph")?; - let versions = u64_column(batch, "manifest_version", "commit graph")?; - let parents = string_column(batch, "parent_commit_id", "commit graph")?; - let merged_parents = string_column(batch, "merged_parent_commit_id", "commit graph")?; - let created = timestamp_micros_column(batch, "created_at", "commit graph")?; - - for row in 0..batch.num_rows() { - commits.push(GraphCommit { - graph_commit_id: ids.value(row).to_string(), - manifest_branch: if branches.is_null(row) { - None - } else { - Some(branches.value(row).to_string()) - }, - manifest_version: versions.value(row), - parent_commit_id: if parents.is_null(row) { - None - } else { - Some(parents.value(row).to_string()) - }, - merged_parent_commit_id: if merged_parents.is_null(row) { - None - } else { - Some(merged_parents.value(row).to_string()) - }, - actor_id: None, - created_at: created.value(row), - }); - } - } - Ok(commits) -} - fn commit_actors_to_batch(records: &[CommitActorRecord]) -> Result { let commit_ids: Vec<&str> = records .iter() @@ -832,51 +632,6 @@ fn should_replace_head(current: Option<&GraphCommit>, candidate: &GraphCommit) - }) } -fn string_column<'a>(batch: &'a RecordBatch, name: &str, context: &str) -> Result<&'a StringArray> { - batch - .column_by_name(name) - .ok_or_else(|| { - OmniError::manifest_internal(format!("{context} batch missing '{name}' column")) - })? - .as_any() - .downcast_ref::() - .ok_or_else(|| { - OmniError::manifest_internal(format!("{context} column '{name}' is not Utf8")) - }) -} - -fn u64_column<'a>(batch: &'a RecordBatch, name: &str, context: &str) -> Result<&'a UInt64Array> { - batch - .column_by_name(name) - .ok_or_else(|| { - OmniError::manifest_internal(format!("{context} batch missing '{name}' column")) - })? - .as_any() - .downcast_ref::() - .ok_or_else(|| { - OmniError::manifest_internal(format!("{context} column '{name}' is not UInt64")) - }) -} - -fn timestamp_micros_column<'a>( - batch: &'a RecordBatch, - name: &str, - context: &str, -) -> Result<&'a TimestampMicrosecondArray> { - batch - .column_by_name(name) - .ok_or_else(|| { - OmniError::manifest_internal(format!("{context} batch missing '{name}' column")) - })? - .as_any() - .downcast_ref::() - .ok_or_else(|| { - OmniError::manifest_internal(format!( - "{context} column '{name}' is not Timestamp(Microsecond)" - )) - }) -} - fn ancestor_distances( start_id: &str, commits: &HashMap, @@ -912,284 +667,3 @@ async fn open_for_branch(root_uri: &str, branch: Option<&str>) -> Result, -} - -/// Build a synthetic pre-Phase-7 (internal-schema v3) graph at `root_uri`: graph -/// lineage lives ONLY in `_graph_commits.lance` (+ its actor sidecar), `__manifest` -/// carries NO `graph_commit`/`graph_head` rows, and the stamp is set to v3. This -/// reproduces exactly the on-disk shape a graph created by a pre-RFC-013-Phase-7 -/// binary would have, so the v3→v4 migration and the v3-read fallback can be -/// tested against it. -/// -/// The lineage is a realistic DAG with a branch + a real merge: genesis → A → -/// (feature commit, off to the side) → merge(A, feature) at the head of main, -/// with authored actors on the non-genesis commits. Reaches the dead-on-the- -/// write-path `append_commit_with_parents`/`append_actor` (still present for -/// exactly this transitional purpose) to write the legacy rows. -#[cfg(any(test, feature = "failpoints"))] -pub async fn seed_legacy_v3_lineage(root_uri: &str) -> Result { - let root = root_uri.trim_end_matches('/'); - - // 1. Create `__manifest` (Phase-7 folds genesis lineage into it) and the - // EMPTY legacy `_graph_commits.lance`. We then append the v3-style commit - // rows below — a real v3 graph carried its genesis in `_graph_commits`. - crate::db::manifest::seed_manifest_for_v3_fixture(root).await?; - let mut cg = CommitGraph::init(root).await?; - // Clear the cache that init seeded from the (genesis-bearing) manifest, so - // the appended rows below are the whole story and parents come out right. - cg.commit_by_id.clear(); - cg.head_commit = None; - - // 2. Append the legacy lineage to `_graph_commits.lance` on main. - let genesis = cg - .append_commit_with_parents(None, 1, None, None, None) - .await?; - let commit_a = cg - .append_commit_with_parents(None, 2, Some(&genesis), None, Some("act-a")) - .await?; - let feature_commit = cg - .append_commit_with_parents(Some("feature"), 3, Some(&commit_a), None, Some("act-feature")) - .await?; - let merge_commit = cg - .append_commit_with_parents( - None, - 4, - Some(&commit_a), - Some(&feature_commit), - Some("act-merger"), - ) - .await?; - - // 3. Strip the genesis lineage rows the Phase-7 init folded into `__manifest` - // and rewind the stamp to v3, so the manifest matches a true pre-Phase-7 - // graph (no lineage in `__manifest`, stamp v3). - crate::db::manifest::strip_lineage_and_set_v3_stamp_for_fixture(root).await?; - - Ok(V3LineageFixture { - genesis: genesis.clone(), - commit_a: commit_a.clone(), - feature_commit: feature_commit.clone(), - merge_commit: merge_commit.clone(), - all_ids: vec![genesis, commit_a, feature_commit, merge_commit], - }) -} - -/// Identities of a synthetic pre-Phase-7 (v3) graph that carries a REAL Lance -/// branch (built by [`seed_legacy_v3_lineage_with_branch`]). -#[cfg(test)] -#[derive(Debug, Clone)] -pub struct V3BranchedLineageFixture { - /// The genesis (parentless) commit on main. - pub genesis: String, - /// A direct authored commit on main (actor `act-a`). The head of main. - pub commit_a: String, - /// A commit on the real `feature` Lance branch (actor `act-branch`), - /// parented off `commit_a`. The head of `feature`. - pub branch_commit: String, - /// The branch name forked on both `_graph_commits.lance` and `__manifest`. - pub branch: String, -} - -/// Build a synthetic pre-Phase-7 (internal-schema v3) graph at `root_uri` that -/// carries a REAL Lance branch `feature` on BOTH `_graph_commits.lance` and -/// `__manifest`, reproducing exactly the on-disk shape of a branched graph -/// created by a pre-RFC-013-Phase-7 binary: -/// -/// - `_graph_commits.lance`: main has `genesis → A`; the `feature` Lance branch -/// adds `branch_commit` (parent `A`). Authored actors land in the FLAT actor -/// sidecar (the pre-Phase-7 commit graph never forked the actor table). -/// - `__manifest`: main is stamped v3 with NO lineage rows; the `feature` branch -/// is forked from main's v3 state, so it too is v3 with NO lineage of its own. -/// -/// This is the fixture the per-branch v3→v4 migration runs against: it lets a -/// test prove that migrating the `feature` branch reads the branch's legacy -/// lineage, writes it into the BRANCH's `__manifest`, and leaves main untouched — -/// the case the main-only [`seed_legacy_v3_lineage`] cannot exercise. -#[cfg(test)] -pub async fn seed_legacy_v3_lineage_with_branch(root_uri: &str) -> Result { - let root = root_uri.trim_end_matches('/'); - - // 1. `__manifest` (genesis folded by Phase-7 init) + an empty legacy - // `_graph_commits.lance`. Clear the init-seeded cache so the rows we - // append below are the whole story. - crate::db::manifest::seed_manifest_for_v3_fixture(root).await?; - let mut cg = CommitGraph::init(root).await?; - cg.commit_by_id.clear(); - cg.head_commit = None; - - // 2. Main lineage on `_graph_commits.lance`: genesis → A (authored). - let genesis = cg - .append_commit_with_parents(None, 1, None, None, None) - .await?; - let commit_a = cg - .append_commit_with_parents(None, 2, Some(&genesis), None, Some("act-a")) - .await?; - - // 3. Fork a real `feature` Lance branch on `_graph_commits.lance`, switch the - // handle to it, and append an authored branch commit (its actor lands in - // the flat main actor table — exactly the pre-Phase-7 shape). - cg.create_branch("feature").await?; - let commits_ds = cg - .dataset - .take() - .expect("commits dataset present after create_branch") - .checkout_branch("feature") - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - cg.dataset = Some(commits_ds); - cg.active_branch = Some("feature".to_string()); - let branch_commit = cg - .append_commit_with_parents(Some("feature"), 3, Some(&commit_a), None, Some("act-branch")) - .await?; - - // 4. Rewind main's `__manifest` to the v3 shape (strip the folded genesis - // lineage, set stamp 3) BEFORE forking — so the `feature` manifest branch - // inherits the stripped v3 state (no lineage, stamp 3). - crate::db::manifest::strip_lineage_and_set_v3_stamp_for_fixture(root).await?; - crate::db::manifest::fork_manifest_branch_for_v3_fixture(root, "feature").await?; - - Ok(V3BranchedLineageFixture { - genesis, - commit_a, - branch_commit, - branch: "feature".to_string(), - }) -} - -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use arrow_schema::{DataType, Field, Schema}; - - use super::*; - - // RFC-013 step 4: the v3-read fallback / migration source reads a NAMED - // branch's lineage from a real Lance branch on `_graph_commits.lance`, while - // resolving actors from the FLAT actor table (the pre-Phase-7 commit graph - // forked only the commits dataset, never the actor sidecar). This guards - // both that branch-checkout path and the flat-actor resolution — the case - // the main-branch fixture (commits on main only) does not exercise. - #[tokio::test] - async fn read_legacy_commit_cache_resolves_branch_commits_with_flat_actors() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - - // A v3 graph needs `__manifest` to exist for `CommitGraph::init`'s - // genesis-cache seed; we clear that cache and write our own legacy rows. - crate::db::manifest::seed_manifest_for_v3_fixture(uri) - .await - .unwrap(); - let mut cg = CommitGraph::init(uri).await.unwrap(); - cg.commit_by_id.clear(); - cg.head_commit = None; - - // Main lineage: genesis → A (authored). The actor lands in the FLAT - // `_graph_commit_actors.lance` (never branched). - let genesis = cg - .append_commit_with_parents(None, 1, None, None, None) - .await - .unwrap(); - let commit_a = cg - .append_commit_with_parents(None, 2, Some(&genesis), None, Some("act-a")) - .await - .unwrap(); - - // Fork a real Lance branch on `_graph_commits.lance`, switch the handle - // to it, and append an authored branch commit (its actor also goes to - // the flat main actor table — exactly the pre-Phase-7 shape). - cg.create_branch("feature").await.unwrap(); - cg.dataset = Some( - cg.dataset - .take() - .unwrap() - .checkout_branch("feature") - .await - .unwrap(), - ); - cg.active_branch = Some("feature".to_string()); - let branch_commit = cg - .append_commit_with_parents( - Some("feature"), - 3, - Some(&commit_a), - None, - Some("act-branch"), - ) - .await - .unwrap(); - - // The legacy read at the branch sees the inherited main commits + the - // branch commit, the head is the branch commit, and the authored actors - // resolve from the flat table (no branch checkout on the actor dataset). - let (commits, head) = read_legacy_commit_cache(uri, Some("feature")).await.unwrap(); - assert_eq!(commits.len(), 3, "branch inherits genesis + A + its own commit"); - assert_eq!( - head.as_ref().unwrap().graph_commit_id, - branch_commit, - "the branch commit is the head" - ); - assert_eq!( - commits.get(&commit_a).unwrap().actor_id.as_deref(), - Some("act-a"), - "main commit's actor resolves from the flat actor table", - ); - assert_eq!( - commits.get(&branch_commit).unwrap().actor_id.as_deref(), - Some("act-branch"), - "branch commit's actor resolves from the flat actor table", - ); - } - - #[test] - fn load_commits_from_batches_returns_error_for_bad_schema() { - let batch = RecordBatch::try_new( - Arc::new(Schema::new(vec![ - Field::new("graph_commit_id", DataType::UInt64, false), - Field::new("manifest_branch", DataType::Utf8, true), - Field::new("manifest_version", DataType::UInt64, false), - Field::new("parent_commit_id", DataType::Utf8, true), - Field::new("merged_parent_commit_id", DataType::Utf8, true), - Field::new( - "created_at", - DataType::Timestamp(TimeUnit::Microsecond, None), - false, - ), - ])), - vec![ - Arc::new(UInt64Array::from(vec![1_u64])), - Arc::new(StringArray::from(vec![None::<&str>])), - Arc::new(UInt64Array::from(vec![1_u64])), - Arc::new(StringArray::from(vec![None::<&str>])), - Arc::new(StringArray::from(vec![None::<&str>])), - Arc::new(TimestampMicrosecondArray::from(vec![1_i64])), - ], - ) - .unwrap(); - - let err = load_commits_from_batches(&[batch]).unwrap_err(); - assert!(err.to_string().contains("graph_commit_id")); - } -} diff --git a/crates/omnigraph/src/db/manifest.rs b/crates/omnigraph/src/db/manifest.rs index 806c4beb..9d6abd1c 100644 --- a/crates/omnigraph/src/db/manifest.rs +++ b/crates/omnigraph/src/db/manifest.rs @@ -35,7 +35,6 @@ pub(crate) use metadata::TableVersionMetadata; use metadata::{OMNIGRAPH_ROW_COUNT_KEY, table_version_metadata_for_state}; #[cfg(test)] use namespace::{branch_manifest_namespace, staged_table_namespace}; -pub(crate) use migrations::refuse_if_stamp_unsupported; pub(crate) use publisher::LineageIntent; use publisher::{GraphNamespacePublisher, ManifestBatchPublisher, PublishOutcome}; pub(crate) use recovery::{ @@ -81,28 +80,9 @@ pub(crate) struct CommitOutcome { pub parent_commit_id: Option, } -/// Apply pending internal-schema migrations against `__manifest` on the -/// open-for-write path, independent of a publish. -/// -/// `Omnigraph::open(ReadWrite)` calls this before the coordinator reads branch -/// state, so branch-observing code (`branch_list`, the schema-apply -/// blocking-branch checks) sees the post-migration graph. In particular the -/// v2→v3 step sweeps legacy `__run__*` staging branches off `__manifest` -/// (MR-770); running it here closes the window where those branches would -/// otherwise block schema apply before the first publish runs the migration. -/// -/// Idempotent: a no-op stamp read when the on-disk version already matches. -pub(crate) async fn migrate_on_open(root_uri: &str) -> Result<()> { - let mut dataset = open_manifest_dataset(root_uri, None).await?; - // Main branch: the v3→v4 lineage backfill reads `_graph_commits.lance` at - // main. Named branches migrate on their own first write via the publisher. - migrations::migrate_internal_schema(&mut dataset, root_uri, None).await -} - /// The on-disk internal-schema stamp of `__manifest` at `branch` (main when -/// `None`). The transitional v3-read fallback in `CommitGraph` uses this to -/// decide whether to source lineage from `__manifest` (stamp ≥ v4, post-Phase-7) -/// or from the legacy `_graph_commits.lance` (stamp < v4, not yet migrated). +/// `None`). Used by the open-path refusal guard and to surface the storage +/// version to operators (`omnigraph snapshot`). pub(crate) async fn internal_schema_stamp_at(root_uri: &str, branch: Option<&str>) -> Result { let dataset = open_manifest_dataset(root_uri, branch).await?; Ok(migrations::read_stamp(&dataset)) @@ -110,91 +90,15 @@ pub(crate) async fn internal_schema_stamp_at(root_uri: &str, branch: Option<&str /// Refuse to open a graph whose `__manifest` is stamped outside this binary's /// supported internal-schema range (newer than CURRENT, or older than -/// MIN_SUPPORTED). The read-only open path calls this — it skips the write-path -/// migration where the refusal otherwise lives — so an old binary still refuses a -/// newer graph instead of silently misreading it, and a too-new binary refuses a -/// below-floor graph instead of opening an unmigrated one. +/// MIN_SUPPORTED). Both open paths (read-write and read-only) call this before +/// reading any data, so an old binary refuses a newer graph instead of silently +/// misreading it, and this binary refuses a below-floor graph with a +/// rebuild-via-export/import message instead of opening a format it can't read. pub(crate) async fn refuse_if_internal_schema_unsupported(root_uri: &str) -> Result<()> { let stamp = internal_schema_stamp_at(root_uri, None).await?; migrations::refuse_if_stamp_unsupported(stamp) } -/// The internal-schema version this binary writes. Exposed so the v3-read -/// fallback can compare a branch's on-disk stamp against it. -pub(crate) const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = - migrations::INTERNAL_MANIFEST_SCHEMA_VERSION; - -/// Test-only: create a `__manifest` for a minimal catalog, the first half of a -/// synthetic pre-Phase-7 (v3) graph (see `commit_graph::seed_legacy_v3_lineage`). -/// A small two-type schema is enough — the v3→v4 migration touches only the -/// lineage rows, never the table-version rows. -#[cfg(any(test, feature = "failpoints"))] -pub(crate) async fn seed_manifest_for_v3_fixture(root_uri: &str) -> Result<()> { - let schema = omnigraph_compiler::schema::parser::parse_schema( - "node Person { name: String }\nedge Knows: Person -> Person { }\n", - ) - .map_err(|e| OmniError::manifest(e.to_string()))?; - let catalog = - omnigraph_compiler::catalog::build_catalog(&schema).map_err(|e| OmniError::manifest(e.to_string()))?; - ManifestCoordinator::init(root_uri, &catalog).await?; - Ok(()) -} - -/// Test-only: strip the `graph_commit`/`graph_head` rows that Phase-7 init folds -/// into `__manifest`, then rewind the internal-schema stamp to v3 — completing a -/// synthetic pre-Phase-7 graph whose lineage lives only in `_graph_commits.lance`. -#[cfg(any(test, feature = "failpoints"))] -pub(crate) async fn strip_lineage_and_set_v3_stamp_for_fixture(root_uri: &str) -> Result<()> { - let mut dataset = open_manifest_dataset(root_uri, None).await?; - dataset - .delete("object_type = 'graph_commit' OR object_type = 'graph_head'") - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - // Re-open so the stamp write lands on the post-delete HEAD. - let mut dataset = open_manifest_dataset(root_uri, None).await?; - migrations::set_stamp_for_test(&mut dataset, 3).await -} - -/// Test-only: fork a real Lance branch `name` on `__manifest` from main's CURRENT -/// state. Call AFTER `strip_lineage_and_set_v3_stamp_for_fixture` so the forked -/// branch inherits the v3 stamp with no lineage rows — i.e. a faithful -/// pre-Phase-7 branch whose `__manifest` carries no lineage of its own. The -/// branch's commits live only on the `_graph_commits.lance` branch until the -/// per-branch v3→v4 migration runs against this branch's `__manifest`. -#[cfg(test)] -pub(crate) async fn fork_manifest_branch_for_v3_fixture(root_uri: &str, name: &str) -> Result<()> { - let mut dataset = open_manifest_dataset(root_uri, None).await?; - let version = dataset.version().version; - dataset - .create_branch(name, version, None) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - Ok(()) -} - -/// Test-support re-export of the read-write migration entry point for the -/// `failpoints` integration binary (which can't reach `pub(crate)` items). Gated -/// on `test` OR `failpoints`; never in a release build. -#[cfg(any(test, feature = "failpoints"))] -pub async fn migrate_on_open_for_test(root_uri: &str) -> Result<()> { - migrate_on_open(root_uri).await -} - -/// Test-support: the number of `graph_commit` lineage rows in `__manifest` at -/// `branch` (main when `None`), plus the on-disk internal-schema stamp. Lets the -/// `failpoints` integration binary assert the migration neither stamped nor -/// backfilled when a legacy-open fault fired. Gated on `test` OR `failpoints`. -#[cfg(any(test, feature = "failpoints"))] -pub async fn lineage_row_count_and_stamp_for_test( - root_uri: &str, - branch: Option<&str>, -) -> Result<(usize, u32)> { - let dataset = open_manifest_dataset(root_uri, branch).await?; - let stamp = migrations::read_stamp(&dataset); - let (rows, _heads) = read_graph_lineage(&dataset).await?; - Ok((rows.len(), stamp)) -} - /// Immutable point-in-time view of the database. /// /// Cheap to create (no storage I/O). All reads within a query go through one diff --git a/crates/omnigraph/src/db/manifest/migrations.rs b/crates/omnigraph/src/db/manifest/migrations.rs index 207def4c..95d593da 100644 --- a/crates/omnigraph/src/db/manifest/migrations.rs +++ b/crates/omnigraph/src/db/manifest/migrations.rs @@ -1,113 +1,79 @@ -//! Internal schema migrations for the `__manifest` Lance dataset. +//! Internal schema versioning for the `__manifest` Lance dataset. //! //! ## Why this exists //! -//! The on-disk shape of `__manifest` evolves alongside the engine. We do not -//! want healing hooks scattered through every read/write path that ask -//! "is this an old shape? am I supposed to upgrade it?" — that pattern -//! accrues liability with every change. Instead this module is the *single* -//! place where on-disk shape is reconciled with what the binary expects: +//! The on-disk shape of `__manifest` evolves alongside the engine. This module +//! is the *single* place where on-disk shape is reconciled with what the binary +//! expects: //! //! - One constant `INTERNAL_MANIFEST_SCHEMA_VERSION` declares the shape this //! binary writes. //! - One stamp `omnigraph:internal_schema_version` in the manifest dataset's //! schema-level metadata records the on-disk shape. -//! - One dispatcher `migrate_internal_schema` walks the on-disk stamp forward -//! to the expected version via `match`-arm steps. Each future change adds -//! one arm + one test, never a new branch in unrelated code paths. +//! - One guard `refuse_if_stamp_unsupported` rejects any graph this binary +//! cannot serve — in either direction — with a clear, actionable error. //! -//! After the dispatcher runs, the rest of the engine assumes current shape. -//! No code outside this module should ever inspect the stamp. +//! ## Single-version contract (strand + export/import) //! -//! ## When it runs +//! This binary reads exactly ONE internal-schema version (`MIN_SUPPORTED == +//! CURRENT`). There is no in-place migration: a graph stamped below CURRENT is +//! refused on open with a "rebuild via `omnigraph export` + `init`/`load`" +//! message, not silently upgraded. This is the deliberate pre-release contract — +//! storage-format changes are a cutover, not a rolling in-place migration (see +//! `docs/user/operations/upgrade.md` and the versioning policy in `docs/dev`). +//! `stamp_current_version` stamps fresh graphs at CURRENT, so newly initialized +//! graphs always pass. //! -//! Only on open-for-write paths (the publisher's `load_publish_state`). -//! Reads are side-effect-free by contract; an old-shape `__manifest` reads -//! fine, it just lacks the protections introduced by later versions. -//! `init_manifest_graph` stamps the current version at creation, so newly -//! initialized graphs never need migration. +//! ## If an in-place migration is ever needed +//! +//! The stamp + `refuse_if_stamp_unsupported` are the seam a future migration +//! would plug into: re-introduce a dispatcher that walks the stamp forward and +//! lower `MIN_SUPPORTED` below CURRENT for exactly the versions it can upgrade. +//! Until a concrete graph demands it, that machinery is unearned complexity and +//! is deliberately absent. A future converter is best shaped as a standalone +//! one-shot tool, not a framework baked into the open path. //! //! ## Forward-version protection //! -//! A stamp *higher* than this binary's known version triggers a clear -//! "upgrade omnigraph first" error. An old binary cannot clobber a newer -//! schema by silently treating "unknown stamp" as "missing stamp". +//! A stamp *higher* than this binary's version triggers a clear "upgrade +//! omnigraph first" error. An old binary cannot clobber a newer schema by +//! silently treating "unknown stamp" as "missing stamp". use lance::Dataset; use crate::error::{OmniError, Result}; -use crate::db::commit_graph::GraphCommit; -use super::state::{GraphLineageRow, graph_lineage_row_parts, merge_lineage_rows, read_graph_lineage}; - /// Current internal schema version this binary expects to find on disk. /// /// History: /// - v1 — implicit (pre-stamp). `__manifest.object_id` carried no -/// `lance-schema:unenforced-primary-key` annotation; the publisher had -/// no row-level CAS protection (see `.context/merge-insert-cas-granularity.md`). +/// `lance-schema:unenforced-primary-key` annotation. /// - v2 — `__manifest.object_id` carries the unenforced-PK annotation, -/// engaging Lance's bloom-filter conflict resolver at commit time. Added -/// alongside `expected_table_versions` OCC on `ManifestBatchPublisher::publish`. +/// engaging Lance's bloom-filter conflict resolver at commit time. /// - v3 — one-time sweep of legacy `__run__` staging branches left on the -/// `__manifest` dataset by the pre-v0.4.0 Run state machine (removed in -/// MR-771). Once swept, the `is_internal_run_branch` defense-in-depth guard -/// is no longer needed (MR-770). +/// `__manifest` dataset by the pre-v0.4.0 Run state machine. /// - v4 — RFC-013 Phase 7 folds graph lineage into `__manifest` as -/// `graph_commit`/`graph_head` rows written in the publish CAS. A pre-Phase-7 -/// (v3) graph has its lineage only in `_graph_commits.lance`, so the new -/// binary would read an empty commit DAG. This one-time per-branch backfill -/// copies the lineage from `_graph_commits.lance` into `__manifest` -/// (`migrate_v3_to_v4`). `_graph_commits.lance` is left in place as the -/// branch-ref carrier; no commit rows are ever written to it again. +/// `graph_commit`/`graph_head` rows written in the publish CAS (no +/// `_graph_commits.lance`). +/// +/// v1–v3 graphs are not served by this binary (see `MIN_SUPPORTED`); the history +/// is kept for provenance and to document what each stamp value meant. pub(crate) const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = 4; -/// The oldest on-disk internal-schema stamp this binary will open. A graph below -/// this floor is refused (`refuse_if_stamp_unsupported`) with a "migrate it -/// forward with an older release first" error, instead of obliging this binary to -/// carry that version's `migrate_vN_…` arm and the legacy readers it needs -/// forever. Raising the floor is how the migration chain sheds old code. -/// -/// **Retirement runbook** — turning "accumulates forever" into a sliding window: -/// 1. *Shed version N* once no graph below `N+1` remains in the fleet: bump this -/// floor AND `LOWEST_REGISTERED_MIGRATION_SOURCE` to `N+1`, then delete the -/// `N =>` arm in `migrate_internal_schema`, `migrate_vN_to_vN+1`, and its -/// helpers + tests. The tripwire test keeps the two consts in lockstep, so a -/// half-done shed fails CI. -/// 2. *Retire the v3 legacy readers entirely* once MIN ≥ 4: `git rm` the -/// `commit_graph/commit_graph_legacy_v3.rs` seam file and flip the single -/// `stamp < CURRENT` gate in `load_commit_cache_for_branch` to read the -/// manifest projection unconditionally. +/// The oldest on-disk internal-schema stamp this binary will open. With no +/// in-place migration, this equals `INTERNAL_MANIFEST_SCHEMA_VERSION`: a graph +/// stamped below it is refused (`refuse_if_stamp_unsupported`) with a +/// rebuild-via-export/import message rather than silently upgraded. /// -/// MIN = 1 today is a pure no-op: `read_stamp` floors an absent stamp at 1 and no -/// real graph carries 0, so nothing is refused. -pub(crate) const MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION: u32 = 1; - -/// The lowest `current` value the `migrate_internal_schema` dispatcher still has a -/// `match` arm for. Mirrors the lowest registered migration source so a floor bump -/// that forgets to delete the now-dead arm (or vice versa) is caught by the -/// compile-time tripwire below. Migration arms aren't an enumerable registry, so -/// this hand-mirrored const is the minimal enforced coupling — cheaper than -/// reshaping the dispatcher into a data-driven table. -const LOWEST_REGISTERED_MIGRATION_SOURCE: u32 = 1; - -/// Retirement tripwire (compile-time): the refusal floor and the lowest migration -/// arm must move together. Raising `MIN_SUPPORTED` without deleting the now-dead -/// below-floor arm — or vice versa — fails the build with this message, which is -/// stronger than a runtime test and impossible to skip. Migration arms can't be -/// enumerated, so this const-mirror is the check. -const _: () = assert!( - LOWEST_REGISTERED_MIGRATION_SOURCE == MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION, - "internal-schema floor drifted from the lowest registered migration arm: when raising \ - MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION, delete every below-floor `N =>` arm + migrate_vN_… \ - + its helpers/tests and bump LOWEST_REGISTERED_MIGRATION_SOURCE to match (or vice versa)", -); +/// Lowering this below CURRENT only makes sense alongside a re-introduced +/// migration dispatcher that can actually walk those versions forward (see the +/// module doc). +pub(crate) const MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION: u32 = INTERNAL_MANIFEST_SCHEMA_VERSION; const INTERNAL_SCHEMA_VERSION_KEY: &str = "omnigraph:internal_schema_version"; -const OBJECT_ID_PK_KEY: &str = "lance-schema:unenforced-primary-key"; /// Read the on-disk stamp from `__manifest`'s schema-level metadata. -/// Absent ⇒ v1 (pre-stamp world). +/// Absent ⇒ v1 (pre-stamp world), which is below `MIN_SUPPORTED` and so refused. pub(crate) fn read_stamp(dataset: &Dataset) -> u32 { dataset .schema() @@ -124,15 +90,14 @@ pub(super) async fn stamp_current_version(dataset: &mut Dataset) -> Result<()> { } /// Refuse to open a manifest whose stamp this binary cannot serve — in either -/// direction — with a clear upgrade path. Shared by every place a stamp is read -/// and enforced: the write-path migration dispatcher, the read-only open guard, -/// and the branch lineage-read path. Checking both bounds in one function means a -/// new stamp-reading caller gets the floor and the ceiling together and cannot +/// direction — with a clear, actionable path. Shared by every open path (the +/// read-write open guard, the read-only open guard, and the publisher), so a new +/// stamp-reading caller gets the floor and the ceiling together and cannot /// half-enforce. /// /// - `stamp > CURRENT`: the graph was written by a newer binary — upgrade omnigraph. -/// - `stamp < MIN_SUPPORTED`: the graph predates the oldest migration this binary -/// still carries — migrate it forward with an older release first, then reopen. +/// - `stamp < MIN_SUPPORTED`: the graph was made by an older omnigraph whose +/// storage format this binary does not read — rebuild it via export/import. pub(crate) fn refuse_if_stamp_unsupported(stamp: u32) -> Result<()> { if stamp > INTERNAL_MANIFEST_SCHEMA_VERSION { return Err(OmniError::manifest(format!( @@ -143,364 +108,16 @@ pub(crate) fn refuse_if_stamp_unsupported(stamp: u32) -> Result<()> { } if stamp < MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION { return Err(OmniError::manifest(format!( - "__manifest is stamped at internal schema v{} but this binary supports v{} or later \ - — open it with an older omnigraph release to migrate it forward first, then reopen", - stamp, MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION, + "__manifest is stamped at internal schema v{stamp}, but this omnigraph reads only v{current}. \ + This graph was created by an older release; rebuild it: run `omnigraph export` with that \ + older release, then `omnigraph init` + `omnigraph load` with this one. \ + (Data, vectors, and blobs are preserved; commit history and branches are not.)", + current = INTERNAL_MANIFEST_SCHEMA_VERSION, ))); } Ok(()) } -/// Apply any pending internal-schema migrations to the manifest dataset. -/// -/// Idempotent: when the on-disk stamp matches the binary, this is a single -/// metadata read with no writes. -/// -/// `root_uri` + `branch` identify which graph + branch this `dataset` is a -/// manifest for. The v3→v4 lineage backfill needs them to read that branch's -/// `_graph_commits.lance`. `migrate_on_open` passes the main branch -/// (`branch = None`); the publisher's `load_publish_state` passes its own -/// branch, so each branch backfills on its first write. -pub(super) async fn migrate_internal_schema( - dataset: &mut Dataset, - root_uri: &str, - branch: Option<&str>, -) -> Result<()> { - let mut current = read_stamp(dataset); - - refuse_if_stamp_unsupported(current)?; - - while current < INTERNAL_MANIFEST_SCHEMA_VERSION { - match current { - 1 => { - migrate_v1_to_v2(dataset).await?; - current = 2; - } - 2 => { - migrate_v2_to_v3(dataset).await?; - current = 3; - } - 3 => { - migrate_v3_to_v4(dataset, root_uri, branch).await?; - current = 4; - } - other => { - return Err(OmniError::manifest_internal(format!( - "no internal-schema migration registered for v{} → v{}", - other, - other + 1, - ))); - } - } - } - Ok(()) -} - -/// v1 → v2: annotate `__manifest.object_id` as Lance's unenforced primary key -/// so the merge-insert conflict resolver enforces row-level CAS at commit -/// time, then bump the stamp. -/// -/// Idempotent under crash-retry by construction. Lance 7 makes the unenforced -/// primary key **immutable once set**: any write that touches the reserved -/// `lance-schema:unenforced-primary-key` field metadata after the PK is set -/// errors ("cannot be changed once set", `lance::dataset::transaction`), even -/// re-applying the same value. A crash between the field-set and the stamp -/// bump leaves the field set without a stamp, so the next open re-enters here -/// with the PK already present — we must therefore set it only when absent. -/// (Fresh graphs bake the PK into `manifest_schema()` at init and never run -/// this migration; only genuine pre-v0.4.0 graphs do.) -async fn migrate_v1_to_v2(dataset: &mut Dataset) -> Result<()> { - // The guard is over the *specific* field, not just "any PK is set": skipping - // when `object_id` is already the PK is the idempotent crash-recovery path, - // but a manifest whose PK is some *other* field has the wrong CAS key — and - // Lance 7 won't let us change it. Refuse loudly rather than silently leave - // merge-insert conflict detection keyed on the wrong column. - let pk_fields: Vec<&str> = dataset - .schema() - .unenforced_primary_key() - .iter() - .map(|field| field.name.as_str()) - .collect(); - match pk_fields.as_slice() { - ["object_id"] => {} // already migrated (or a crash re-entry) — idempotent no-op - [] => { - dataset - .update_field_metadata() - .update( - "object_id", - [(OBJECT_ID_PK_KEY.to_string(), "true".to_string())], - ) - .map_err(|e| OmniError::Lance(e.to_string()))? - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - } - other => { - return Err(OmniError::manifest_internal(format!( - "__manifest unenforced primary key is {other:?}, expected [\"object_id\"]; \ - refusing to migrate a manifest with an unexpected CAS key" - ))); - } - } - set_stamp(dataset, 2).await -} - -/// v2 → v3: sweep legacy `__run__` staging branches off the `__manifest` -/// dataset, then bump the stamp. -/// -/// The pre-v0.4.0 Run state machine (removed in MR-771) created graph-level -/// staging branches named `__run__` on `__manifest`. MR-771 stopped -/// creating them but left any pre-existing ones in place; Lance's -/// `list_branches` still enumerates them, so they leak into `branch_list()` -/// and count as blocking branches at schema-apply time. This one-time sweep -/// removes them so the `is_internal_run_branch` guard can retire (MR-770). -/// -/// The `"__run__"` prefix is inlined here on purpose: this migration must keep -/// working after the `run_registry` module (the guard) is deleted, so it does -/// not depend on it. -/// -/// Idempotent under both sequential retry and concurrent runners: each run -/// re-enumerates `list_branches` fresh, and `force_delete_branch` tolerates a -/// branch that is already gone — so a crash before the stamp bump, or a second -/// process opening the same legacy graph at the same time, never errors out. -async fn migrate_v2_to_v3(dataset: &mut Dataset) -> Result<()> { - const LEGACY_RUN_BRANCH_PREFIX: &str = "__run__"; - let branches = dataset - .list_branches() - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - let run_branches: Vec = branches - .into_keys() - .filter(|name| { - name.trim_start_matches('/') - .starts_with(LEGACY_RUN_BRANCH_PREFIX) - }) - .collect(); - for name in run_branches { - // `force_delete_branch` deletes even when the `BranchContents` is - // already gone. Plain `delete_branch` errors "BranchContents not - // found", which would fail a second concurrent open (or a retry that - // raced another runner) after the first one swept the branch. Force is - // exactly Lance's documented path for cleaning up zombie branches. - dataset - .force_delete_branch(&name) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - } - set_stamp(dataset, 3).await -} - -/// v3 → v4: backfill the graph lineage from `_graph_commits.lance` into -/// `__manifest`, then bump the stamp. -/// -/// RFC-013 Phase 7 made `__manifest` the single source of graph lineage -/// (`graph_commit` / `graph_head:` rows, written in the publish CAS). -/// A pre-Phase-7 (v3) graph has its lineage only in `_graph_commits.lance` and -/// none in `__manifest`, so the new binary would read an EMPTY commit DAG. This -/// one-time per-branch migration copies that branch's commits + the single head -/// into `__manifest` so reads see the real history. `_graph_commits.lance` -/// itself is left untouched as the branch-ref carrier (no commit row is ever -/// written to it again). -/// -/// `dataset` is the `__manifest` for `branch` (main when `branch` is `None`); -/// the migration runs per-branch on that branch's first write, so it reads -/// `_graph_commits.lance` at the SAME branch. -/// -/// Idempotency + crash recovery: the stamp bump is the LAST step, and the -/// lineage merge is keyed on `object_id` (re-inserting the same commit rows is a -/// no-op update). A crash after the merge but before the stamp bump re-enters -/// here at v3 and re-runs harmlessly. As a fast path, if `__manifest` already -/// carries `graph_commit` rows (a previous run completed the merge), we skip -/// straight to the stamp bump. -/// -/// Concurrent runners: two processes (or two open-for-write handles) can open the -/// same legacy graph at once and both reach the backfill merge. `merge_lineage_rows` -/// uses `conflict_retries(0)`, so the row-level CAS loser on `graph_head:` -/// must be re-driven here rather than failing the open — `migrate_v2_to_v3` is -/// concurrent-runner idempotent and this step must be too. The bounded loop -/// re-reads the fast path (a concurrent winner's merge is one atomic Lance commit, -/// so a re-read sees either zero or all of its rows, never partial), re-opens the -/// stale handle past the winner's commit, and retries. On budget exhaustion it -/// returns a `RowLevelCasContention`-typed error so the publisher's OUTER retry -/// loop (which only re-runs `is_retryable_publish_conflict` conflicts) completes -/// it on the next attempt — the same converge-on-next-attempt contract the -/// recovery sweep uses. -async fn migrate_v3_to_v4( - dataset: &mut Dataset, - root_uri: &str, - branch: Option<&str>, -) -> Result<()> { - // Mirror the publisher's budget (`publisher::PUBLISHER_RETRY_BUDGET = 5`); kept - // as a local const rather than re-exporting that private one — the two are the - // same shape (bounded row-level-CAS retries) but independent knobs. - const MIGRATION_MERGE_RETRY_BUDGET: u32 = 5; - - // Exclusive range + an unguarded retryable arm (see `commit_v4_stamp_idempotently` - // for the rationale): every retryable conflict re-opens and retries inside the - // loop, and the SINGLE reachable exhaustion path is the typed contention return - // below — so the retryable variant can never fall through to the `Err(err)` - // propagate arm on the last iteration. - for _ in 0..MIGRATION_MERGE_RETRY_BUDGET { - // Fast path / idempotency + concurrent-winner guard: if the backfill - // already landed (a previous run, OR a concurrent runner that won the CAS - // — its merge is atomic, so this is all-or-nothing), don't re-merge — just - // (re)stamp. `dataset` is re-opened past any winner's commit below, so this - // re-read sees the winner's rows on a retry. - let (existing_lineage, _heads) = read_graph_lineage(dataset).await?; - if !existing_lineage.is_empty() { - return commit_v4_stamp_idempotently(dataset, root_uri, branch).await; - } - - // Read this branch's legacy commit cache (commits + the head). An absent or - // empty `_graph_commits.lance` yields no commits — nothing to backfill. - let (commit_by_id, head) = - crate::db::commit_graph::read_legacy_commit_cache(root_uri, branch).await?; - if commit_by_id.is_empty() { - return commit_v4_stamp_idempotently(dataset, root_uri, branch).await; - } - - let parts = build_lineage_backfill_parts(&commit_by_id, head.as_ref(), branch)?; - - match merge_lineage_rows(dataset.clone(), &parts).await { - Ok(new_dataset) => { - *dataset = new_dataset; - // Stamp LAST. Crash window: a failure between the merge above and - // this stamp bump leaves stamp v3 + lineage present in `__manifest`. - // The next open re-enters at v3, the fast path at the top sees the - // lineage and skips straight to the stamp bump — completing the - // migration with no duplicate rows (the merge is keyed on - // `object_id`). Pinned by - // `crash_after_merge_before_stamp_completes_on_next_open`. - return commit_v4_stamp_idempotently(dataset, root_uri, branch).await; - } - // A concurrent runner won the `graph_head:` CAS. Our in-hand - // handle is stale at the pre-contention HEAD, so a re-open is required - // to see the winner's commit; then re-loop (the fast path will see the - // winner's lineage and stamp). Bounded by the budget. - Err(err) if super::publisher::is_retryable_publish_conflict(&err) => { - *dataset = super::layout::open_manifest_dataset(root_uri, branch).await?; - continue; - } - Err(err) => return Err(err), - } - } - - // Budget exhausted under sustained contention. Return a CAS-typed error (not a - // plain conflict) so the publisher's outer retry loop — which only re-runs - // `is_retryable_publish_conflict` — re-runs `load_publish_state` and completes - // the migration, rather than giving up. - Err(OmniError::manifest_row_level_cas_contention(format!( - "v3→v4 lineage backfill exhausted {} retries against concurrent runners", - MIGRATION_MERGE_RETRY_BUDGET - ))) -} - -/// Stamp the v3→v4 migration's terminal version idempotently under concurrent -/// runners. `set_stamp` issues an `UpdateConfig` Lance commit; once the merge CAS -/// loser is made to converge (above), BOTH runners reach this stamp bump and race -/// it — the loser gets `lance::Error::IncompatibleTransaction` (two `UpdateConfig` -/// commits touching the same metadata key), which is NOT a row-level CAS -/// contention and so is not caught by the merge loop. But both write the SAME -/// value, so the conflict is benign: re-open and, if the stamp already reached the -/// target (the concurrent runner finished it), succeed; otherwise re-apply. -/// Bounded; on exhaustion surface a CAS-typed error for the publisher's outer -/// retry, same as the merge loop. -async fn commit_v4_stamp_idempotently( - dataset: &mut Dataset, - root_uri: &str, - branch: Option<&str>, -) -> Result<()> { - const STAMP_RETRY_BUDGET: u32 = 5; - // Exclusive range + an UNGUARDED `IncompatibleTransaction` arm: the retryable - // variant is always handled inside the loop (re-open + same-value check + retry), - // so it can never fall through to the stringifying `Err(e)` catch-all, and the - // SINGLE reachable exhaustion path is the typed contention return below. (A - // `0..=BUDGET` range with an `attempt < BUDGET` guard let the last iteration's - // retryable conflict reach the catch-all and return a non-retryable - // `OmniError::Lance` — the publisher's outer retry would then give up.) - for _ in 0..STAMP_RETRY_BUDGET { - // Inline the `update_schema_metadata` write (rather than `set_stamp`) so the - // raw Lance error variant is in hand — `set_stamp` pre-stringifies it. - let stamp_result = stamp_internal_schema(dataset).await; - match stamp_result { - Ok(_) => return Ok(()), - Err(lance::Error::IncompatibleTransaction { .. }) => { - // A concurrent runner's `UpdateConfig` preempted ours — the - // retryable case. Re-open past its commit; if it already stamped to - // the target we're done (the value is identical), else fall through - // to retry on the advanced handle. - *dataset = super::layout::open_manifest_dataset(root_uri, branch).await?; - if read_stamp(dataset) >= INTERNAL_MANIFEST_SCHEMA_VERSION { - return Ok(()); - } - } - Err(e) => return Err(OmniError::Lance(e.to_string())), - } - } - - // Exhausted the budget against sustained concurrent stampers. Return a - // CAS-typed (retryable) error so the publisher's OUTER retry — which only - // re-runs `is_retryable_publish_conflict` — completes it, rather than the - // stringified `OmniError::Lance` it would treat as fatal. - Err(OmniError::manifest_row_level_cas_contention(format!( - "v3→v4 stamp bump exhausted {} retries against concurrent runners", - STAMP_RETRY_BUDGET - ))) -} - -/// The single `update_schema_metadata` write that bumps the on-disk internal-schema -/// stamp to the current version. Extracted from `commit_v4_stamp_idempotently`'s -/// retry loop so a `failpoints` test can inject a concurrent-stamper -/// `IncompatibleTransaction` deterministically (the loop's exhaustion path is -/// otherwise near-unreachable). Returns the RAW `lance::Error` so the loop can match -/// the `IncompatibleTransaction` variant — `set_stamp` pre-stringifies it. -async fn stamp_internal_schema(dataset: &mut Dataset) -> std::result::Result<(), lance::Error> { - crate::failpoints::maybe_fail_lance_incompatible("migration.v4_stamp.force_incompatible")?; - dataset - .update_schema_metadata([( - INTERNAL_SCHEMA_VERSION_KEY.to_string(), - INTERNAL_MANIFEST_SCHEMA_VERSION.to_string(), - )]) - .await - .map(|_| ()) -} - -/// Build the `__manifest` rows for the v3→v4 backfill: one immutable -/// `graph_commit` row per commit, plus EXACTLY ONE `graph_head:` row for -/// the actual head. Each commit encodes to a `[graph_commit, graph_head]` pair, -/// but only the head commit's head row is kept — the others would be redundant -/// updates of the same `graph_head:` object_id (the head is per-branch, -/// not per-commit). -fn build_lineage_backfill_parts( - commit_by_id: &std::collections::HashMap, - head: Option<&GraphCommit>, - branch: Option<&str>, -) -> Result> { - let head_id = head.map(|h| h.graph_commit_id.as_str()); - // Deterministic iteration order (the source is a HashMap): merge-insert is - // keyed on `object_id` so the final manifest content is order-independent, - // but a stable order keeps the produced batch reproducible regardless. - let mut commits: Vec<&GraphCommit> = commit_by_id.values().collect(); - commits.sort_by(|a, b| a.graph_commit_id.cmp(&b.graph_commit_id)); - let mut parts = Vec::with_capacity(commits.len() + 1); - for commit in commits { - let row = GraphLineageRow { - graph_commit_id: commit.graph_commit_id.clone(), - manifest_branch: commit.manifest_branch.clone(), - manifest_version: commit.manifest_version, - parent_commit_id: commit.parent_commit_id.clone(), - merged_parent_commit_id: commit.merged_parent_commit_id.clone(), - actor_id: commit.actor_id.clone(), - created_at: commit.created_at, - }; - let [commit_part, head_part] = graph_lineage_row_parts(&row, branch)?; - parts.push(commit_part); - if Some(commit.graph_commit_id.as_str()) == head_id { - parts.push(head_part); - } - } - Ok(parts) -} - async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> { dataset .update_schema_metadata([(INTERNAL_SCHEMA_VERSION_KEY.to_string(), version.to_string())]) @@ -509,11 +126,10 @@ async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> { Ok(()) } -/// Test-only: force the on-disk internal-schema stamp to `version`. Used to -/// synthesize a pre-migration graph (rewinding to v3) and to simulate a crash -/// that lost the final stamp bump. Gated on `test` OR `failpoints` so the -/// fault-injection migration test (in the `failpoints` integration binary, -/// compiled without `cfg(test)`) can reach it too. +/// Test-only: force the on-disk internal-schema stamp to `version`. The minimal +/// seam used to synthesize a sub-CURRENT graph and assert the open path refuses +/// it. Gated on `test` OR `failpoints` so the integration binaries (compiled +/// without `cfg(test)`) can reach it too. #[cfg(any(test, feature = "failpoints"))] pub(crate) async fn set_stamp_for_test(dataset: &mut Dataset, version: u32) -> Result<()> { set_stamp(dataset, version).await @@ -523,10 +139,9 @@ pub(crate) async fn set_stamp_for_test(dataset: &mut Dataset, version: u32) -> R mod tests { use super::*; - /// The floor never refuses any stamp the binary can actually serve — a graph - /// at MIN through CURRENT passes, only sub-MIN / super-CURRENT are rejected. - /// With MIN = 1 and CURRENT = 4 this proves the live range is exactly [1, 4] - /// and that the floor is a no-op for every real graph (lowest real stamp is 1). + /// The guard accepts exactly the single served version and refuses anything + /// below the floor or above the ceiling. With `MIN == CURRENT == 4` the live + /// range is exactly `[4, 4]`. #[test] fn unsupported_guard_accepts_exactly_the_supported_range() { for stamp in MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION..=INTERNAL_MANIFEST_SCHEMA_VERSION { diff --git a/crates/omnigraph/src/db/manifest/publisher.rs b/crates/omnigraph/src/db/manifest/publisher.rs index ccd3c5fc..2639360e 100644 --- a/crates/omnigraph/src/db/manifest/publisher.rs +++ b/crates/omnigraph/src/db/manifest/publisher.rs @@ -33,7 +33,7 @@ use crate::error::{OmniError, Result}; use super::SubTableUpdate; use super::layout::{open_manifest_dataset, tombstone_object_id, version_object_id}; use super::metadata::{TableVersionMetadata, parse_namespace_version_request}; -use super::migrations::migrate_internal_schema; +use super::migrations::{read_stamp, refuse_if_stamp_unsupported}; use super::state::{ GraphLineageRow, GraphLineageRowPart, ManifestState, assemble_manifest_state, graph_lineage_row_parts, head_lineage_row, manifest_rows_batch, manifest_schema, @@ -151,14 +151,13 @@ impl GraphNamespacePublisher { crate::failpoints::maybe_fail_retryable_contention( crate::failpoints::names::PUBLISH_LOAD_STATE_RETRYABLE_CONTENTION, )?; - let mut dataset = self.dataset().await?; - // Run pending internal-schema migrations exactly once per publish on - // the open-for-write path; idempotent when the on-disk stamp already - // matches this binary. Pass this publisher's branch so the v3→v4 lineage - // backfill reads `_graph_commits.lance` at the SAME branch it is - // publishing to (each branch backfills on its first write). See + let dataset = self.dataset().await?; + // Refuse a graph this binary cannot serve before publishing. Fresh and + // already-current graphs pass; a sub-CURRENT stamp (an older storage + // format) is refused with the rebuild-via-export/import message. There is + // no in-place migration — storage-format changes are a cutover. See // `db/manifest/migrations.rs`. - migrate_internal_schema(&mut dataset, &self.root_uri, self.branch.as_deref()).await?; + refuse_if_stamp_unsupported(read_stamp(&dataset))?; // ONE `__manifest` scan for everything the publish needs: table // locations, version entries, tombstones, AND the `graph_commit` lineage // rows for parent resolution (RFC-013 P2). The lineage extraction rides @@ -692,13 +691,8 @@ impl ManifestBatchPublisher for GraphNamespacePublisher { } for attempt in 0..=PUBLISHER_RETRY_BUDGET { - // `load_publish_state` runs the v3→v4 migration (`migrate_internal_schema`) - // on its first scan. The migration's bounded merge/stamp retries surface a - // retryable `RowLevelCasContention` on exhaustion EXPECTING this outer loop - // to re-run them — a re-run re-reads the manifest, by which point a - // concurrent winner has usually completed the migration (next scan is a - // no-op). Route a retryable load error through the SAME retry path as a - // retryable `merge_rows` conflict below, so that typed contention actually + // Route a retryable `load_publish_state` error through the SAME retry + // path as a retryable `merge_rows` conflict below, so typed contention // composes with the publisher retry instead of aborting the publish. let loaded = match self.load_publish_state().await { Ok(loaded) => loaded, diff --git a/crates/omnigraph/src/db/manifest/state.rs b/crates/omnigraph/src/db/manifest/state.rs index 13d80ba3..b546d2e2 100644 --- a/crates/omnigraph/src/db/manifest/state.rs +++ b/crates/omnigraph/src/db/manifest/state.rs @@ -623,71 +623,6 @@ pub(super) fn entries_to_batch( ) } -/// Merge-insert a set of graph-lineage rows (`graph_commit` + `graph_head`) -/// straight into `__manifest`, keyed on `object_id`. Used only by the v3→v4 -/// internal-schema backfill (RFC-013 step 4): the normal write path folds -/// lineage into the publisher's batch, but the migration writes lineage with -/// no accompanying table-version change, so it issues its own merge. -/// -/// Mirrors the publisher's merge knobs (`use_index(false)`, `skip_auto_cleanup`, -/// `conflict_retries(0)`) so it has identical CAS / cleanup semantics. The -/// migration runs under the open-for-write path and is idempotent (re-inserting -/// the same `object_id` rows updates them in place), so it does not need the -/// publisher's retry loop. Returns the advanced dataset (its version is the -/// commit the lineage landed in). -pub(crate) async fn merge_lineage_rows( - dataset: Dataset, - parts: &[GraphLineageRowPart], -) -> Result { - let len = parts.len(); - let mut object_ids = Vec::with_capacity(len); - let mut object_types = Vec::with_capacity(len); - let mut metadata = Vec::with_capacity(len); - let mut table_versions = Vec::with_capacity(len); - let mut table_branches = Vec::with_capacity(len); - for part in parts { - object_ids.push(part.object_id.clone()); - object_types.push(part.object_type.to_string()); - metadata.push(Some(part.metadata.clone())); - table_versions.push(part.table_version); - table_branches.push(part.table_branch.clone()); - } - // Lineage rows carry no table identity: empty `table_key`, null location / - // row_count (matching `lineage_part_to_pending` in the publisher). - let batch = manifest_rows_batch( - object_ids, - object_types, - vec![None; len], - metadata, - vec![String::new(); len], - table_versions, - table_branches, - vec![None; len], - )?; - let reader = - arrow_array::RecordBatchIterator::new(vec![Ok(batch)], manifest_schema()); - let dataset = Arc::new(dataset); - let mut merge_builder = - lance::dataset::MergeInsertBuilder::try_new(dataset, vec!["object_id".to_string()]) - .map_err(|e| OmniError::Lance(e.to_string()))?; - merge_builder.when_matched(lance::dataset::WhenMatched::UpdateAll); - merge_builder.when_not_matched(lance::dataset::WhenNotMatched::InsertAll); - merge_builder.conflict_retries(0); - merge_builder.use_index(false); - merge_builder.skip_auto_cleanup(true); - let (new_dataset, _stats) = merge_builder - .try_build() - .map_err(|e| OmniError::Lance(e.to_string()))? - .execute_reader(Box::new(reader)) - // Route through the publisher's classifier (not a stringify) so a - // concurrent first-open's CAS loss on `__manifest` surfaces as the SAME - // typed `RowLevelCasContention` the publisher's retry consumes. The - // migration's re-open retry loop matches on that to converge instead of - // erroring out (FIX B). - .await - .map_err(super::publisher::map_lance_publish_error)?; - Ok(Arc::try_unwrap(new_dataset).unwrap_or_else(|arc| (*arc).clone())) -} pub(super) fn manifest_rows_batch( object_ids: Vec, diff --git a/crates/omnigraph/src/db/manifest/tests.rs b/crates/omnigraph/src/db/manifest/tests.rs index 6584705a..9b677fb4 100644 --- a/crates/omnigraph/src/db/manifest/tests.rs +++ b/crates/omnigraph/src/db/manifest/tests.rs @@ -1592,134 +1592,6 @@ async fn test_init_stamps_internal_schema_version() { ); } -#[tokio::test] -async fn test_publish_migrates_pre_stamp_manifest_to_current_version() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let catalog = build_test_catalog(); - let mc = ManifestCoordinator::init(uri, &catalog).await.unwrap(); - - // Simulate a v1 (pre-stamp) graph by removing the schema-level stamp on disk. - { - let mut ds = open_manifest_dataset(uri, None).await.unwrap(); - ds.update_schema_metadata([( - "omnigraph:internal_schema_version".to_string(), - None::, - )]) - .await - .unwrap(); - let post = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&post), - 1, - "stamp removed ⇒ read_stamp falls back to v1", - ); - } - - // Publish a no-op (empty changes) but require state to be loaded by passing - // an expected_table_versions that matches the initial state. This forces - // the publisher's open-for-write path, which runs the migration. - let mut expected = HashMap::new(); - expected.insert("node:Person".to_string(), 1); - GraphNamespacePublisher::new(uri, None) - .publish(&[], &expected, None) - .await - .unwrap(); - - let post = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&post), - super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - "publish on a v1 graph should leave the manifest stamped at the current version", - ); - - // Manifest should still serve correctly post-migration. - drop(mc); - let reopened = ManifestCoordinator::open(uri).await.unwrap(); - assert!(reopened.snapshot().entry("node:Person").is_some()); -} - -#[tokio::test] -async fn test_v2_to_v3_sweeps_legacy_run_branches_on_write_open() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let catalog = build_test_catalog(); - let mut mc = ManifestCoordinator::init(uri, &catalog).await.unwrap(); - - // Synthesize a pre-MR-770 graph: several stale `__run__` staging branches - // left on `__manifest` (a real legacy graph accumulates one per run), plus - // a real user branch that must survive the sweep. Multiple run branches - // exercise the migration's delete loop on a single reused dataset handle. - mc.create_branch("__run__01J9LEGACY").await.unwrap(); - mc.create_branch("__run__01J9SECOND").await.unwrap(); - mc.create_branch("__run__01J9THIRD").await.unwrap(); - mc.create_branch("feature").await.unwrap(); - let before = mc.list_branches().await.unwrap(); - assert_eq!( - before.iter().filter(|b| b.starts_with("__run__")).count(), - 3, - "precondition: three legacy run branches exist on __manifest; got {before:?}", - ); - - // Rewind the internal-schema stamp to v2 so the next write-open runs the - // v2 → v3 sweep arm (init stamps at the current version, which is past it). - { - let mut ds = open_manifest_dataset(uri, None).await.unwrap(); - ds.update_schema_metadata([( - "omnigraph:internal_schema_version".to_string(), - Some("2".to_string()), - )]) - .await - .unwrap(); - let post = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&post), - 2, - "stamp rewound to v2" - ); - } - - // A no-op publish forces the open-for-write path, which runs the migration. - let mut expected = HashMap::new(); - expected.insert("node:Person".to_string(), 1); - GraphNamespacePublisher::new(uri, None) - .publish(&[], &expected, None) - .await - .unwrap(); - - // Stamp advanced to current; the legacy run branch is physically gone from - // `__manifest` (checked via the raw, unfiltered manifest list — not the - // guard-filtered `branch_list`), and the real branch + `main` survive. - let post = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&post), - super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - ); - let reopened = ManifestCoordinator::open(uri).await.unwrap(); - let after = reopened.list_branches().await.unwrap(); - assert!( - !after.iter().any(|b| b.starts_with("__run__")), - "legacy run branch must be swept; got {after:?}", - ); - assert!( - after.iter().any(|b| b == "feature"), - "user branch must survive" - ); - assert!(after.iter().any(|b| b == "main"), "main must survive"); - - // Idempotent: a second write-open finds the stamp at current and does not - // re-run the sweep or error. - GraphNamespacePublisher::new(uri, None) - .publish(&[], &expected, None) - .await - .unwrap(); - let final_ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&final_ds), - super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - ); -} - #[tokio::test] async fn test_publish_rejects_manifest_stamped_at_future_version() { let dir = tempfile::tempdir().unwrap(); @@ -1769,156 +1641,6 @@ fn manifest_column_helpers_return_error_for_bad_schema() { assert!(err.to_string().contains("table_key")); } -// ── RFC-013 Phase 7 stage 4: existing-graph (v3 → v4) lineage migration ────── -// -// A graph created by a pre-Phase-7 binary (internal schema v3) keeps its -// lineage in `_graph_commits.lance`, with NONE in `__manifest`. The new binary -// reads lineage from the `__manifest` projection, so without a migration it -// would see an EMPTY commit DAG. These tests pin the backfill (`migrate_v3_to_v4`), -// its idempotency, the transitional v3-read fallback, the read-only refusal, and -// the crash-mid-migration recovery. - -use crate::db::commit_graph::{CommitGraph, seed_legacy_v3_lineage}; - -/// Number of `graph_commit` rows in `__manifest` at main. -async fn manifest_commit_row_count(uri: &str) -> usize { - let ds = open_manifest_dataset(uri, None).await.unwrap(); - let (rows, _heads) = read_graph_lineage(&ds).await.unwrap(); - rows.len() -} - -#[tokio::test] -async fn v3_graph_backfills_lineage_into_manifest_on_read_write_open() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - - let fixture = seed_legacy_v3_lineage(uri).await.unwrap(); - - // Precondition: a true v3 graph — stamp 3, NO lineage rows in `__manifest`, - // and a NEW-binary projection therefore reads an empty DAG. - { - let ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!(super::migrations::read_stamp(&ds), 3, "fixture is stamped v3"); - } - assert_eq!( - manifest_commit_row_count(uri).await, - 0, - "precondition: __manifest carries no graph_commit rows in a v3 graph", - ); - - // Run the production read-write migration entry point (main branch). - super::migrate_on_open(uri).await.unwrap(); - - // The manifest now carries the lineage and is stamped at the current version. - { - let ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&ds), - super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - "migration stamps the manifest at the current internal schema version", - ); - } - // 4 commits (genesis, A, feature, merge) → 4 `graph_commit` rows. - assert_eq!( - manifest_commit_row_count(uri).await, - fixture.all_ids.len(), - "every legacy commit is backfilled into __manifest", - ); - - // The commit-graph projection (now sourced from __manifest) reconstructs the - // full DAG: every old id resolves, parents/merge parents are connected, the - // merge commit's actor + two parents survive, and the head is the merge. - let cg = CommitGraph::open(uri).await.unwrap(); - let commits = cg.load_commits().await.unwrap(); - assert_eq!(commits.len(), fixture.all_ids.len()); - for id in &fixture.all_ids { - assert!( - cg.get_commit(id).is_some(), - "old commit id {id} must still resolve after migration", - ); - } - - let genesis = cg.get_commit(&fixture.genesis).unwrap(); - assert!(genesis.parent_commit_id.is_none(), "genesis is parentless"); - assert!(genesis.actor_id.is_none(), "genesis is actorless"); - - let commit_a = cg.get_commit(&fixture.commit_a).unwrap(); - assert_eq!(commit_a.parent_commit_id.as_deref(), Some(fixture.genesis.as_str())); - assert_eq!(commit_a.actor_id.as_deref(), Some("act-a"), "actor backfilled inline"); - - let merge = cg.get_commit(&fixture.merge_commit).unwrap(); - assert_eq!(merge.parent_commit_id.as_deref(), Some(fixture.commit_a.as_str())); - assert_eq!( - merge.merged_parent_commit_id.as_deref(), - Some(fixture.feature_commit.as_str()), - "the merge commit keeps both parents", - ); - assert_eq!(merge.actor_id.as_deref(), Some("act-merger")); - - assert_eq!( - cg.head_commit_id().await.unwrap().as_deref(), - Some(fixture.merge_commit.as_str()), - "the merge commit is the head of main after migration", - ); - - // merge_base of main vs main is reflexively the head — a smoke check that the - // ancestor walk works over the backfilled DAG. - let base = CommitGraph::merge_base(uri, None, None).await.unwrap(); - assert!(base.is_some(), "merge_base resolves over the backfilled DAG"); -} - -#[tokio::test] -async fn v3_to_v4_migration_is_idempotent() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let fixture = seed_legacy_v3_lineage(uri).await.unwrap(); - - super::migrate_on_open(uri).await.unwrap(); - let after_first = manifest_commit_row_count(uri).await; - // Re-running the migration must not duplicate any rows. - super::migrate_on_open(uri).await.unwrap(); - let after_second = manifest_commit_row_count(uri).await; - - assert_eq!(after_first, fixture.all_ids.len()); - assert_eq!( - after_first, after_second, - "a second migration pass adds no duplicate graph_commit rows", - ); -} - -#[tokio::test] -async fn v3_graph_reads_history_via_fallback_without_migrating() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let fixture = seed_legacy_v3_lineage(uri).await.unwrap(); - - // Open the commit-graph projection WITHOUT running the migration (this is the - // read-only path: `CommitGraph::open` reads, never writes). The stamp-gated - // fallback sources lineage from `_graph_commits.lance`, so history is correct. - let cg = CommitGraph::open(uri).await.unwrap(); - let commits = cg.load_commits().await.unwrap(); - assert_eq!( - commits.len(), - fixture.all_ids.len(), - "the v3 fallback reads the full legacy DAG with no migration", - ); - assert_eq!( - cg.head_commit_id().await.unwrap().as_deref(), - Some(fixture.merge_commit.as_str()), - ); - - // The fallback is read-only: stamp stays v3, __manifest still has no lineage. - { - let ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!(super::migrations::read_stamp(&ds), 3, "fallback did not write"); - } - assert_eq!( - manifest_commit_row_count(uri).await, - 0, - "the read-only fallback writes nothing to __manifest", - ); -} - #[tokio::test] async fn future_stamp_is_refused_in_both_open_modes() { use crate::db::{Omnigraph, OpenMode}; @@ -1958,408 +1680,51 @@ async fn future_stamp_is_refused_in_both_open_modes() { } } +// A graph stamped below CURRENT (the strand floor: `MIN_SUPPORTED == CURRENT`, +// so anything older than v4) is refused on open in BOTH modes, with the +// rebuild-via-export/import hint — there is no in-place migration. This is the +// floor twin of `future_stamp_is_refused_in_both_open_modes` (the ceiling). The +// open path (`Omnigraph::open` read-write and `Omnigraph::open_read_only`) routes +// the stamp through `refuse_if_stamp_unsupported`, whose sub-MIN branch points +// the operator at `omnigraph export`. #[tokio::test] -async fn sub_floor_stamp_is_refused_in_both_open_modes() { - use crate::db::{Omnigraph, OpenMode}; - use crate::storage::storage_for_uri; +async fn sub_current_graph_is_refused_on_open_with_rebuild_hint() { + use crate::db::Omnigraph; let dir = tempfile::tempdir().unwrap(); let uri = dir.path().to_str().unwrap(); + // A full v4 graph (schema artifacts present) so the open path gets past its + // schema read to the stamp check. Omnigraph::init(uri, "node Person { name: String }\n") .await .unwrap(); - // Stamp below MIN_SUPPORTED (1 today). No real graph carries 0 — `read_stamp` - // floors an absent stamp at 1 — so this is the symmetric twin of - // `future_stamp_is_refused_in_both_open_modes`, exercising the floor the - // combined `refuse_if_stamp_unsupported` guard adds at every open mode - // (write-path migrate, read-only open, and the branch lineage-read path). The - // upper side — a graph at exactly MIN migrating to CURRENT — is covered by - // `test_publish_migrates_pre_stamp_manifest_to_current_version`, where an - // absent stamp reads as 1 = MIN. - { - let mut ds = open_manifest_dataset(uri, None).await.unwrap(); - ds.update_schema_metadata([( - "omnigraph:internal_schema_version".to_string(), - Some("0".to_string()), - )]) - .await - .unwrap(); - } - - let storage = storage_for_uri(uri).unwrap(); - for mode in [OpenMode::ReadWrite, OpenMode::ReadOnly] { - let err = match Omnigraph::open_with_storage_and_mode(uri, Arc::clone(&storage), mode).await - { - Ok(_) => panic!("{mode:?}: a sub-floor graph must be refused"), - Err(err) => err, - }; - assert!( - err.to_string().contains("migrate it forward"), - "{mode:?}: expected a migrate-forward floor refusal, got: {err}", - ); - } -} - -#[tokio::test] -async fn crash_after_merge_before_stamp_completes_on_next_open() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let fixture = seed_legacy_v3_lineage(uri).await.unwrap(); - - // Simulate a crash that landed the lineage merge but lost the stamp bump: - // run the full migration (lineage now in __manifest), then rewind the stamp - // to v3. This is exactly the on-disk state after a crash at the - // `migration.v3_to_v4.after_merge_before_stamp` window. - super::migrate_on_open(uri).await.unwrap(); + // Rewind main's stamp to v3 — a graph this binary's single served version + // (v4) cannot open, since `MIN_SUPPORTED == CURRENT == 4`. { let mut ds = open_manifest_dataset(uri, None).await.unwrap(); super::migrations::set_stamp_for_test(&mut ds, 3).await.unwrap(); } - assert_eq!( - manifest_commit_row_count(uri).await, - fixture.all_ids.len(), - "crash state: lineage present, stamp rewound to v3", - ); - - // The next open re-enters at v3; the idempotency guard sees the lineage and - // skips straight to the stamp bump — no duplicate rows, migration completes. - super::migrate_on_open(uri).await.unwrap(); - { - let ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&ds), - super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - "the re-entered migration completes the stamp bump", - ); - } - assert_eq!( - manifest_commit_row_count(uri).await, - fixture.all_ids.len(), - "re-running over an already-merged manifest adds no duplicate rows", - ); -} - -/// Migrate the `__manifest` at `branch` (the per-branch v3→v4 entry shape: -/// `migrate_on_open` runs it for main; the publisher runs it for each branch's -/// first write). Returns the migrated branch lineage `(commit_by_id, heads)`. -async fn migrate_branch_and_read_lineage( - uri: &str, - branch: &str, -) -> ( - std::collections::HashMap, - std::collections::HashMap, -) { - let mut ds = open_manifest_dataset(uri, Some(branch)).await.unwrap(); - super::migrations::migrate_internal_schema(&mut ds, uri, Some(branch)) - .await - .unwrap(); - // Re-open at the branch so the read sees the migration's committed HEAD. - let ds = open_manifest_dataset(uri, Some(branch)).await.unwrap(); - let (rows, heads) = read_graph_lineage(&ds).await.unwrap(); - let by_id = rows - .into_iter() - .map(|r| (r.graph_commit_id.clone(), r)) - .collect(); - (by_id, heads) -} - -// FIX C — the per-branch v3→v4 migration against a REAL Lance branch. -// -// `seed_legacy_v3_lineage` writes every commit (incl. the "feature"-tagged one) -// to MAIN's `_graph_commits.lance` with `manifest_branch` as a mere field — it -// never exercises the production per-branch path (`read_legacy_commit_cache` → -// `checkout_branch`, and a branch-scoped `__manifest`). This test builds a graph -// with a REAL Lance branch on both `_graph_commits.lance` and `__manifest`, then -// migrates the BRANCH and asserts the branch's lineage lands in the BRANCH's -// `__manifest` with main untouched. -// -// It also EMPIRICALLY decides the open question behind FIX B: the fast-path -// `read_graph_lineage(dataset)` has no `manifest_branch` filter in its query, but -// `dataset` is branch-scoped (`__manifest` is Lance-branched per graph-branch), -// so a branch should read only its OWN lineage. If migrating the branch were to -// leak main's backfill (or vice versa), that would be a 5th bug needing a branch -// filter. The assertions below pin that it does not. -#[tokio::test] -async fn v3_branch_migration_backfills_branch_manifest_and_leaves_main_untouched() { - use crate::db::commit_graph::seed_legacy_v3_lineage_with_branch; - - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let fx = seed_legacy_v3_lineage_with_branch(uri).await.unwrap(); - - // Precondition: both main and the branch are v3 with no lineage in __manifest. - for branch in [None, Some(fx.branch.as_str())] { - let ds = open_manifest_dataset(uri, branch).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&ds), - 3, - "{branch:?}: fixture branch is stamped v3", - ); - let (rows, _heads) = read_graph_lineage(&ds).await.unwrap(); - assert!( - rows.is_empty(), - "{branch:?}: fixture branch has no lineage in __manifest", - ); - } - - // Migrate ONLY the branch. - let (branch_by_id, branch_heads) = migrate_branch_and_read_lineage(uri, &fx.branch).await; - - // The branch's __manifest now carries the branch's full DAG: genesis, A, and - // the branch commit (3 rows), with the branch commit as `graph_head:feature`. - assert_eq!( - branch_by_id.len(), - 3, - "the branch backfill carries genesis + A + the branch commit", - ); - for id in [&fx.genesis, &fx.commit_a, &fx.branch_commit] { - assert!( - branch_by_id.contains_key(id), - "branch commit {id} must be backfilled into the branch __manifest", - ); - } - assert_eq!( - branch_heads.get(&fx.branch).map(String::as_str), - Some(fx.branch_commit.as_str()), - "graph_head:feature points at the branch commit", - ); - - // Parents + actors survived the backfill. - let branch_commit = &branch_by_id[&fx.branch_commit]; - assert_eq!( - branch_commit.parent_commit_id.as_deref(), - Some(fx.commit_a.as_str()), - "the branch commit keeps its parent", - ); - assert_eq!( - branch_commit.actor_id.as_deref(), - Some("act-branch"), - "the branch commit's authored actor survives", - ); - assert_eq!( - branch_by_id[&fx.commit_a].actor_id.as_deref(), - Some("act-a"), - "the inherited main commit's actor survives on the branch", - ); - - // Contingency check: migrating the branch left MAIN's __manifest untouched — - // still v3, still no lineage. The unfiltered fast-path read is branch-correct - // because `__manifest` is Lance-branched; no `manifest_branch` filter is - // needed (no 5th bug). - { - let main_ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&main_ds), - 3, - "migrating the branch must not advance main's stamp", - ); - let (main_rows, _heads) = read_graph_lineage(&main_ds).await.unwrap(); - assert!( - main_rows.is_empty(), - "migrating the branch must not backfill main's __manifest", - ); - } -} - -// FIX D — the branch read path refuses a `> CURRENT` branch stamp. -// -// `load_commit_cache_for_branch` handled `< CURRENT` (the v3 fallback) and -// `>= CURRENT` (the manifest projection), but never a `> CURRENT` branch stamp — -// it would misread a future shape with the projection. The main read path already -// refuses (`refuse_if_internal_schema_unsupported`), and migrations run main-first so -// main's stamp ≥ every branch's — so this is not a live hole today. The guard is -// defense-in-depth against that ordering invariant ever weakening. Here we -// synthesize the unreachable state directly (force-stamp a branch past CURRENT) -// and assert the branch read refuses loudly instead of misreading. -#[tokio::test] -async fn branch_read_refuses_future_internal_schema_stamp() { - use crate::db::commit_graph::{CommitGraph, seed_legacy_v3_lineage_with_branch}; - - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - // A graph with a real `feature` Lance branch on both `_graph_commits.lance` - // and `__manifest` (so `open_at_branch` can check it out). - let fx = seed_legacy_v3_lineage_with_branch(uri).await.unwrap(); - - // Force the BRANCH's `__manifest` stamp past this binary's known version. - let future = super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION + 1; - { - let mut branch_ds = open_manifest_dataset(uri, Some(&fx.branch)).await.unwrap(); - super::migrations::set_stamp_for_test(&mut branch_ds, future) - .await - .unwrap(); - } - // Reading the commit graph at that branch must refuse, not misread. - let err = match CommitGraph::open_at_branch(uri, &fx.branch).await { - Ok(_) => panic!("a branch stamped past CURRENT must be refused on read"), - Err(e) => e, + // Read-write open is refused with the rebuild hint. + let rw_err = match Omnigraph::open(uri).await { + Ok(_) => panic!("read-write open of a sub-CURRENT graph must be refused"), + Err(err) => err, }; assert!( - err.to_string().contains("upgrade omnigraph"), - "expected an upgrade-omnigraph refusal at the branch read, got: {err}", - ); -} - -// A v4 branch whose AUTHORITATIVE lineage lives in `__manifest` must stay -// readable even when its DERIVED `_graph_commits.lance` branch ref is gone. -// -// `_graph_commits.lance` is no longer the source of graph lineage on a v4 graph -// (RFC-013 Phase 7) — `__manifest`'s `graph_commit`/`graph_head:` rows -// are. The Lance branch ref on `_graph_commits.lance` is a derived artifact, kept -// only so `create_branch`/`cleanup` have something to operate on. An interrupted -// fork-reclaim or a `cleanup` race can leave that ref missing while the manifest -// lineage is fully intact. Per invariants 7 + 15 a missing DERIVED ref must not -// fail a LOGICAL read of the lineage. -// -// The wedge: take a real v4 `feature` branch (its `graph_head:feature` row in -// `__manifest`), then `force_delete` ONLY the `_graph_commits.lance` `feature` -// ref — manifest lineage is left authoritative. The contract: -// - reads at the wedged branch (`open_at_branch` / list-commits / `merge_base`) -// SUCCEED, sourcing the DAG from `__manifest`; and -// - a WRITE that needs the derived ref (`create_branch`) fails LOUDLY with the -// typed actionable error, deferring repair to `cleanup`'s orphan reconciler. -// -// RED before the fix: `open_at_branch` does a hard `checkout_branch(branch)?` on -// the now-missing `_graph_commits.lance` ref and errors `OmniError::Lance`, -// wedging the logical read. -#[tokio::test] -async fn open_at_branch_reads_manifest_lineage_when_commit_graph_ref_is_missing() { - use crate::db::commit_graph::{CommitGraph, seed_legacy_v3_lineage_with_branch}; - - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - - // 1. A graph with a REAL `feature` Lance branch on both `_graph_commits.lance` - // and `__manifest`, then migrate BOTH main and the branch to v4 so the - // branch's lineage is authoritative in `__manifest` (not the legacy - // fallback). After this, `graph_head:feature` resolves the branch commit - // from `__manifest` and the `_graph_commits.lance` `feature` ref still - // exists (the v3→v4 migration leaves it in place). - let fx = seed_legacy_v3_lineage_with_branch(uri).await.unwrap(); - super::migrate_on_open(uri).await.unwrap(); - let (_branch_by_id, branch_heads) = migrate_branch_and_read_lineage(uri, &fx.branch).await; - assert_eq!( - branch_heads.get(&fx.branch).map(String::as_str), - Some(fx.branch_commit.as_str()), - "precondition: __manifest carries graph_head:feature (lineage is authoritative)", - ); - - // 2. Force-delete ONLY the derived `_graph_commits.lance` `feature` ref, - // leaving the `__manifest` `feature` branch (and its lineage) untouched — - // the exact shape an interrupted fork-reclaim / cleanup race produces. - { - let mut cg = CommitGraph::open(uri).await.unwrap(); - cg.force_delete_branch(&fx.branch).await.unwrap(); - } - // Sanity: the derived ref is genuinely gone from `_graph_commits.lance`. - { - let cg = CommitGraph::open(uri).await.unwrap(); - let branches = cg.list_branches().await.unwrap(); - assert!( - !branches.iter().any(|b| b == &fx.branch), - "the _graph_commits.lance feature ref must be deleted to build the wedge, got: {branches:?}", - ); - } - - // 3a. The logical READS at the branch succeed from `__manifest` despite the - // missing derived ref. `open_at_branch` is the one that errors pre-fix. - let mut cg = CommitGraph::open_at_branch(uri, &fx.branch) - .await - .expect("open_at_branch must read manifest lineage when the commit-graph ref is missing"); - let commits = cg.load_commits().await.unwrap(); - assert_eq!( - commits.len(), - 3, - "the branch DAG (genesis + A + branch commit) is read from __manifest", - ); - assert_eq!( - cg.head_commit_id().await.unwrap().as_deref(), - Some(fx.branch_commit.as_str()), - "the branch head resolves from __manifest's graph_head:feature", - ); - let base = CommitGraph::merge_base(uri, Some(&fx.branch), Some(&fx.branch)) - .await - .expect("merge_base must resolve over the manifest-sourced DAG"); - assert_eq!( - base.map(|c| c.graph_commit_id), - Some(fx.branch_commit.clone()), - "merge_base(feature, feature) is reflexively the branch head", + rw_err.to_string().contains("export"), + "read-write refusal must point at `omnigraph export`, got: {rw_err}", ); - // 3b. A WRITE that needs the derived ref fails loudly + actionably — the repair - // is deferred to `cleanup`'s orphan reconciler, not inlined on a read. - let err = match cg.create_branch("derived").await { - Ok(()) => panic!("create_branch must fail when the commit-graph branch ref is missing"), - Err(e) => e, + // Read-only open is refused identically. + let ro_err = match Omnigraph::open_read_only(uri).await { + Ok(_) => panic!("read-only open of a sub-CURRENT graph must be refused"), + Err(err) => err, }; - let msg = err.to_string(); assert!( - msg.contains("commit-graph branch ref") && msg.contains("is missing"), - "expected the typed missing-ref error, got: {msg}", - ); -} - -// FIX B — the v3→v4 lineage backfill must be concurrent-runner idempotent. -// -// `migrate_v2_to_v3` is explicitly safe under two processes opening the same -// legacy graph at once (each re-enumerates branches; `force_delete_branch` -// tolerates an already-gone branch). v3→v4 regressed that: `merge_lineage_rows` -// uses `conflict_retries(0)` and the migration had no app-level retry, so a -// concurrent first-open's CAS loser errored the whole open instead of converging. -// -// This test reproduces exactly two concurrent first-opens: two `__manifest` -// handles opened at the SAME pre-migration (v3, empty-lineage) HEAD, then their -// `migrate_internal_schema` calls run under `tokio::join!`. Both pass the -// fast-path empty-lineage check and both attempt the backfill merge, so the -// row-level CAS on `graph_head:main` is guaranteed to fire — deterministically -// red against the pre-fix code (the loser errors). The contract: BOTH converge -// to `Ok`, the manifest carries exactly the fixture's commit rows (merge keyed on -// `object_id`, so a double-merge stays exact), and the stamp is v4. -// -// (Driving pre-opened handles rather than `migrate_on_open(uri)` twice is a -// deliberate choice: `migrate_on_open` opens fresh each call, so two of them can -// luckily serialize — one finishes before the other reads the fast path — which -// would not exercise the CAS path and would pass even pre-fix. Pre-opening both -// at the empty-lineage HEAD forces the contention every run, so the RED is real.) -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn concurrent_v3_to_v4_migrations_both_converge() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let fixture = seed_legacy_v3_lineage(uri).await.unwrap(); - - // Two handles opened at the same pre-migration HEAD: both see stamp v3 and an - // empty lineage, so both will run the full backfill and collide on the merge. - let mut ds_a = open_manifest_dataset(uri, None).await.unwrap(); - let mut ds_b = open_manifest_dataset(uri, None).await.unwrap(); - - let (res_a, res_b) = tokio::join!( - super::migrations::migrate_internal_schema(&mut ds_a, uri, None), - super::migrations::migrate_internal_schema(&mut ds_b, uri, None), - ); - - // The whole contract: a concurrent first-open's CAS loser converges instead of - // erroring. BOTH must succeed. - res_a.expect("migration runner A must converge"); - res_b.expect("migration runner B must converge"); - - // Exactly the fixture's commits, no duplicates (the merge is keyed on - // `object_id`, so even a double-merge under read-after-write lag stays exact). - assert_eq!( - manifest_commit_row_count(uri).await, - fixture.all_ids.len(), - "concurrent backfills converge to exactly the fixture's commit rows", + ro_err.to_string().contains("export"), + "read-only refusal must point at `omnigraph export`, got: {ro_err}", ); - // And the stamp landed at v4. - { - let ds = open_manifest_dataset(uri, None).await.unwrap(); - assert_eq!( - super::migrations::read_stamp(&ds), - super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, - "both runners leave the manifest stamped at the current version", - ); - } } // ── RFC-013 Phase 7 / step 5: the `graph_head` concurrency gate ────────────── diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index c3860c56..96b837e5 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -408,24 +408,13 @@ impl Omnigraph { mode: OpenMode, ) -> Result { let root = normalize_root_uri(uri)?; - // Apply pending internal-schema migrations before the coordinator reads - // branch state, so `branch_list` and the schema-apply blocking-branch - // checks observe the post-migration graph — notably the v2→v3 sweep of - // legacy `__run__*` staging branches (MR-770). ReadWrite only: a - // read-only open must not trigger object-store writes, so a read-only - // open of an unmigrated legacy graph still lists `__run__*` until its - // first read-write open (an accepted, documented limitation). - if matches!(mode, OpenMode::ReadWrite) { - crate::db::manifest::migrate_on_open(&root).await?; - } else { - // A read-only open skips `migrate_on_open` (no object-store writes), - // which is where the version refusal otherwise lives. Still refuse a - // `__manifest` stamped outside this binary's supported range — newer - // than CURRENT (an old binary cannot silently misread a newer graph, - // e.g. one folded to internal-schema v4 lineage), or below - // MIN_SUPPORTED (predates the readers we carry). Read-only, no write. - crate::db::manifest::refuse_if_internal_schema_unsupported(&root).await?; - } + // Refuse a `__manifest` this binary cannot serve before the coordinator + // reads any branch state — newer than CURRENT (an old binary must not + // silently misread a newer graph) or below MIN_SUPPORTED (an older + // storage format this binary does not read — rebuild via export/import). + // Both open modes refuse: there is no in-place migration, and the check is + // a stamp read with no object-store writes, so it is safe under ReadOnly. + crate::db::manifest::refuse_if_internal_schema_unsupported(&root).await?; // Open the coordinator first so the schema-staging recovery sweep can // compare its snapshot against any leftover staging files. let mut coordinator = GraphCoordinator::open(&root, Arc::clone(&storage)).await?; diff --git a/crates/omnigraph/src/failpoints.rs b/crates/omnigraph/src/failpoints.rs index a353345d..1eca23af 100644 --- a/crates/omnigraph/src/failpoints.rs +++ b/crates/omnigraph/src/failpoints.rs @@ -14,50 +14,11 @@ pub(crate) fn maybe_fail(_name: &str) -> Result<()> { Ok(()) } -/// Failpoint that injects a *Lance* error rather than an `OmniError`. Used to -/// stand in for a `Dataset::open` failing with a transient/corrupt (non-not-found) -/// error, so a test can drive the caller's lance-error classification — the -/// behavior FIX A (`read_legacy_commit_cache`) relies on: a not-found is benign -/// (empty), anything else propagates. A no-op without the `failpoints` feature -/// (the injected variant is therefore unreachable in release builds). -#[allow(unused_variables)] -pub(crate) fn maybe_fail_lance_open(name: &str) -> std::result::Result<(), lance::Error> { - #[cfg(feature = "failpoints")] - { - fail::fail_point!(name, |_| { - Err(lance::Error::io(format!( - "injected failpoint triggered: {name}" - ))) - }); - } - Ok(()) -} - -/// Failpoint that injects a Lance `IncompatibleTransaction` — the variant a -/// concurrent `UpdateConfig` stamp race produces. Lets a test drive the v3→v4 -/// stamp loop's exhaustion path (`commit_v4_stamp_idempotently`) deterministically; -/// it is otherwise near-unreachable, since a real concurrent winner stamps the SAME -/// value, so the loop's re-read returns `Ok` on the first retry. A no-op without the -/// `failpoints` feature. -#[allow(unused_variables)] -pub(crate) fn maybe_fail_lance_incompatible(name: &str) -> std::result::Result<(), lance::Error> { - #[cfg(feature = "failpoints")] - { - fail::fail_point!(name, |_| { - Err(lance::Error::incompatible_transaction_source( - format!("injected failpoint triggered: {name}").into(), - )) - }); - } - Ok(()) -} - /// Failpoint that injects a *retryable* `RowLevelCasContention` `OmniError` — the /// typed conflict the manifest publisher's outer retry treats as retryable /// (`is_retryable_publish_conflict`). Used to drive the publisher's -/// retry-on-`load_publish_state`-error path deterministically: the v3→v4 migration -/// surfaces this same type on exhaustion EXPECTING the publisher to re-run the -/// load, a path otherwise reachable only under sustained multi-writer contention. +/// retry-on-`load_publish_state`-error path deterministically, a path otherwise +/// reachable only under sustained multi-writer contention. /// A no-op without the `failpoints` feature. #[allow(unused_variables)] pub(crate) fn maybe_fail_retryable_contention(name: &str) -> Result<()> { @@ -113,12 +74,8 @@ pub mod names { pub const SCHEMA_APPLY_AFTER_MANIFEST_COMMIT: &str = "schema_apply.after_manifest_commit"; pub const SCHEMA_APPLY_AFTER_STAGING_WRITE: &str = "schema_apply.after_staging_write"; pub const SCHEMA_APPLY_BEFORE_STAGING_WRITE: &str = "schema_apply.before_staging_write"; - // RFC-013 Phase 7 migration failpoints (this branch). - pub const MIGRATION_V3_TO_V4_LEGACY_OPEN: &str = "migration.v3_to_v4.legacy_open"; - pub const MIGRATION_V4_STAMP_FORCE_INCOMPATIBLE: &str = "migration.v4_stamp.force_incompatible"; /// Injects a retryable `RowLevelCasContention` from `load_publish_state` so a - /// test can prove the publisher's outer retry re-runs the load (the migration - /// surfaces this same typed error on exhaustion). + /// test can prove the publisher's outer retry re-runs the load. pub const PUBLISH_LOAD_STATE_RETRYABLE_CONTENTION: &str = "publish.load_state_retryable_contention"; } diff --git a/crates/omnigraph/tests/failpoints.rs b/crates/omnigraph/tests/failpoints.rs index e33aaf28..93fa12a4 100644 --- a/crates/omnigraph/tests/failpoints.rs +++ b/crates/omnigraph/tests/failpoints.rs @@ -4474,138 +4474,12 @@ async fn init_failpoint_returns_original_error_not_cleanup_error() { ); } -// ── RFC-013 Phase 7 / FIX A: a transient legacy-open failure must abort the ── -// v3→v4 migration loudly, not silently swallow the lineage and stamp v4. -// -// `migrate_v3_to_v4` backfills graph lineage from `_graph_commits.lance` into -// `__manifest`, then stamps internal-schema v4. The migration runs exactly once -// per graph (`migrate_internal_schema` is `while stamp < CURRENT`). If a -// transient or corrupt `Dataset::open` of the legacy commit dataset is treated -// as "no legacy data" (the pre-fix `Err(_) => empty` arm), the migration backfills -// NOTHING and stamps v4 — orphaning the real lineage permanently, since the v3 -// fallback is then disabled. The fix matches the not-found variants (benign: -// genuinely no legacy data) and propagates anything else. -// -// This test injects a non-not-found Lance error at the legacy open via the -// `migration.v3_to_v4.legacy_open` failpoint. The load-bearing assertion is the -// last one: a once-transient failure leaves the graph RETRYABLE (stamp still v3, -// no lineage), so a later open with the fault cleared completes the migration — -// it was not a poison pill. -#[tokio::test] -async fn transient_legacy_open_failure_aborts_migration_without_stamping_v4() { - let _scenario = FailScenario::setup(); - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap().to_string(); - - // A real pre-Phase-7 (v3) graph: lineage only in `_graph_commits.lance`, - // `__manifest` stamped v3 with no `graph_commit` rows. - let fixture = omnigraph::db::commit_graph::seed_legacy_v3_lineage(&uri) - .await - .unwrap(); - let (rows_before, stamp_before) = - omnigraph::db::manifest::lineage_row_count_and_stamp_for_test(&uri, None) - .await - .unwrap(); - assert_eq!(stamp_before, 3, "fixture is stamped v3"); - assert_eq!(rows_before, 0, "fixture has no lineage in __manifest"); - - // Arm the legacy-open fault and run the read-write migration entry point. - { - let _fp = ScopedFailPoint::new(names::MIGRATION_V3_TO_V4_LEGACY_OPEN, "return"); - let err = match omnigraph::db::manifest::migrate_on_open_for_test(&uri).await { - Ok(()) => panic!("migration must abort when the legacy open fails transiently"), - Err(e) => e, - }; - // The injected (non-not-found) Lance error must surface, not be masked. - let msg = err.to_string(); - assert!( - msg.contains("injected failpoint triggered: migration.v3_to_v4.legacy_open"), - "expected the injected legacy-open error to propagate, got: {msg}" - ); - } - - // The migration left NO drift: stamp still v3, still no lineage. (Pre-fix, - // the swallow would have stamped v4 with an empty backfill — permanent loss.) - let (rows_after_fault, stamp_after_fault) = - omnigraph::db::manifest::lineage_row_count_and_stamp_for_test(&uri, None) - .await - .unwrap(); - assert_eq!( - stamp_after_fault, 3, - "a transient legacy-open failure must NOT stamp the manifest to v4", - ); - assert_eq!( - rows_after_fault, 0, - "a transient legacy-open failure must NOT partially backfill lineage", - ); - - // The whole correctness claim: a once-transient failure is retryable. With the - // fault cleared, the next migration pass reads the legacy lineage and completes. - omnigraph::db::manifest::migrate_on_open_for_test(&uri) - .await - .unwrap(); - let (rows_done, stamp_done) = - omnigraph::db::manifest::lineage_row_count_and_stamp_for_test(&uri, None) - .await - .unwrap(); - assert_eq!(stamp_done, 4, "the retried migration stamps v4"); - assert_eq!( - rows_done, - fixture.all_ids.len(), - "the retried migration backfills every legacy commit", - ); -} - -// ── RFC-013 Phase 7 / FIX B follow-up: the v3→v4 stamp-bump retry loop must ── -// surface a RETRYABLE contention error on exhaustion, not a stringified Lance error. -// -// `commit_v4_stamp_idempotently` bumps the internal-schema stamp under concurrent -// runners: the `UpdateConfig` CAS loser gets `IncompatibleTransaction`, re-opens, -// confirms the winner stamped the same value, and is done. Genuine exhaustion (every -// attempt loses) must return a `RowLevelCasContention` so the publisher's OUTER retry -// completes the one-time open — an `OmniError::Lance` would be treated as fatal. The -// `migration.v4_stamp.force_incompatible` failpoint forces every stamp attempt to lose, -// driving the otherwise-near-unreachable exhaustion path deterministically. (Pre-fix — -// `0..=BUDGET` + an `attempt < BUDGET` guard — the last iteration fell through to the -// stringifying `Err(e)` arm and returned a non-retryable `OmniError::Lance`.) -#[tokio::test] -async fn v4_stamp_exhaustion_returns_retryable_contention() { - let _scenario = FailScenario::setup(); - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap().to_string(); - - // A real v3 graph: the backfill merge succeeds; only the terminal stamp loop - // is forced to exhaust. - let _fixture = omnigraph::db::commit_graph::seed_legacy_v3_lineage(&uri) - .await - .unwrap(); - - let _fp = ScopedFailPoint::new(names::MIGRATION_V4_STAMP_FORCE_INCOMPATIBLE, "return"); - let err = match omnigraph::db::manifest::migrate_on_open_for_test(&uri).await { - Ok(()) => panic!("migration must error when the stamp bump exhausts its retries"), - Err(e) => e, - }; - assert!( - matches!( - &err, - omnigraph::error::OmniError::Manifest(m) - if matches!( - m.details, - Some(omnigraph::error::ManifestConflictDetails::RowLevelCasContention) - ) - ), - "stamp-bump exhaustion must surface a RETRYABLE RowLevelCasContention so the \ - publisher's outer retry completes the open, got: {err:?}", - ); -} - // The publisher's outer retry must re-run `load_publish_state` on a RETRYABLE error, -// not propagate it fatally. `load_publish_state` runs `migrate_internal_schema`, whose -// bounded merge/stamp loops surface a `RowLevelCasContention` on exhaustion EXPECTING -// this re-run (a clean second scan, by which point a concurrent winner has finished the -// migration). Before the fix, `load_publish_state().await?` short-circuited the loop — -// only `merge_rows` conflicts hit the retry — so the typed contention aborted the -// publish. Inject a ONE-SHOT retryable contention into `load_publish_state`: the write +// not propagate it fatally. A bounded internal loop can surface a `RowLevelCasContention` +// on exhaustion EXPECTING this re-run (a clean second scan, by which point a concurrent +// winner has finished). Before the fix, `load_publish_state().await?` short-circuited the +// outer loop — only `merge_rows` conflicts hit the retry — so the typed contention aborted +// the publish. Inject a ONE-SHOT retryable contention into `load_publish_state`: the write // must still commit, because the publisher retries and the cleared second attempt wins. #[tokio::test] #[serial] diff --git a/crates/omnigraph/tests/lance_surface_guards.rs b/crates/omnigraph/tests/lance_surface_guards.rs index e1fa2ddf..4933166e 100644 --- a/crates/omnigraph/tests/lance_surface_guards.rs +++ b/crates/omnigraph/tests/lance_surface_guards.rs @@ -86,29 +86,6 @@ async fn lance_error_too_much_write_contention_variant_exists() { ); } -// --- Guard 1a: LanceError::IncompatibleTransaction variant exists ---------- -// -// `db/manifest/migrations.rs::commit_v4_stamp_idempotently` pattern-matches on -// this variant: two concurrent v3→v4 runners both bump the internal-schema stamp -// (an `UpdateConfig` commit on the same metadata key), and the loser gets -// `IncompatibleTransaction`. Since both write the same value the conflict is -// benign and is retried idempotently. If Lance renames the variant or removes the -// builder, the match silently stops catching the conflict — this guard fails to -// force an update. - -#[tokio::test] -async fn lance_error_incompatible_transaction_variant_exists() { - let err = lance::Error::incompatible_transaction_source( - "concurrent UpdateConfig at version N".into(), - ); - assert!( - matches!(err, lance::Error::IncompatibleTransaction { .. }), - "Lance::Error::IncompatibleTransaction variant missing or renamed; \ - update db/manifest/migrations.rs::commit_v4_stamp_idempotently and \ - this guard, then re-pin docs/dev/lance.md." - ); -} - // --- Guard 1c: LanceError::DatasetAlreadyExists variant exists -------------- // // `db/commit_graph.rs` and `db/recovery_audit.rs` create internal Lance tables @@ -130,40 +107,6 @@ async fn lance_error_dataset_already_exists_variant_exists() { ); } -// --- Guard 1b: Dataset::open on a missing path returns a not-found variant -- -// -// `db/commit_graph.rs::read_legacy_commit_cache` (the v3→v4 lineage migration -// source) classifies a legacy-open error: a genuine not-found is the benign -// "no legacy data" signal (empty cache), and ANY OTHER error propagates loudly -// rather than being read as "empty" — a swallow there would let the migration -// stamp v4 over an empty backfill, orphaning real lineage permanently. That -// classification relies on Lance mapping an object-store NotFound to -// `DatasetNotFound` (or, for some paths, `NotFound`). If a Lance bump emits a -// different variant for a missing dataset, the migration would propagate a -// genuine "no legacy data" as a hard error — this guard turns red to force the -// classifier (and this guard) to be updated together. - -#[tokio::test] -async fn dataset_open_missing_returns_not_found_variant() { - let dir = tempfile::tempdir().unwrap(); - // A path that was never written — nothing to open. - let missing = dir.path().join("does-not-exist.lance"); - let err = match Dataset::open(missing.to_str().unwrap()).await { - Ok(_) => panic!("opening a never-written dataset path must error"), - Err(e) => e, - }; - assert!( - matches!( - err, - lance::Error::DatasetNotFound { .. } | lance::Error::NotFound { .. } - ), - "Dataset::open on a missing path no longer returns DatasetNotFound/NotFound \ - (got: {err:?}); update db/commit_graph.rs::read_legacy_commit_cache's \ - legacy-open classification and this guard together, then re-pin \ - docs/dev/lance.md." - ); -} - // --- Guard 2: ManifestLocation field shape --------------------------------- // // `db/manifest/metadata.rs:84-88` reads `.path`, `.size`, `.e_tag`, From 46285c01a587ba2b19fd2e4bcf3a9216888d50ef Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 14:15:57 +0200 Subject: [PATCH 3/8] =?UTF-8?q?feat(engine)!:=20retire=20`=5Fgraph=5Fcommi?= =?UTF-8?q?ts.lance`=20/=20`=5Fgraph=5Fcommit=5Factors.lance`=20=E2=80=94?= =?UTF-8?q?=20CommitGraph=20is=20a=20pure=20`=5F=5Fmanifest`=20projection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since RFC-013 Phase 7, graph lineage lives in `__manifest` (`graph_commit` / `graph_head` rows) and branch authority is `__manifest` (branch create forks it first). The two commit-graph datasets were vestigial: `_graph_commit_actors.lance` was never written or read; `_graph_commits.lance` carried zero commit rows and only mirrored the manifest's branch refs (a deny-list "parallel copy"). Retire both. - `CommitGraph` collapses to a pure projection: drops its Lance dataset handles (`dataset`/`actor_dataset`) and all branch methods; `open`/`open_at_branch`/ `refresh`/`init` open NO dataset, building the cache from `ManifestCoordinator::read_graph_lineage_at`. Removes ~1.4s of cold-open dataset opens. - `graph_coordinator`: `commit_graph` is now non-`Option` (always a valid projection). `branch_create`/`branch_delete` go through `ManifestCoordinator` only — a single atomic op, replacing the two-step manifest-fork + commit-graph-fork + rollback. Deleted `create_commit_graph_branch`, `reclaim_commit_graph_branch`, `ensure_commit_graph_initialized`, and every `storage.exists(_graph_commits.lance)` gate. - `optimize`: dropped `reconcile_commit_graph_orphans` and the two tables from the internal-table compaction set (now `__manifest` only). - `instrumentation`: `INTERNAL_TABLE_DIRS` no longer lists the two tables. - Fresh graphs create neither table; `lineage_projection.rs` now asserts both `.lance` dirs are absent. Deleted the obsolete commit-graph-branch-race failpoint tests + their failpoint names, and updated the `maintenance` optimize tests (one internal table, not three). Review-pass fixes folded in: - Removed two stale `omnigraph.rs` in-source tests the prior run missed (a disk-full link failure masked them): one asserting `open` probes `_graph_commits.lance` (the exists-gate this commit removes) — it was masked earlier by a disk-full link failure. - Corrected src comments referencing deleted code (`migrate_v3_to_v4`, `append_commit`/`append_merge_commit`, the three-internal-table list, the `_graph_commits` reconcile owner) in publisher/recovery/optimize/recovery_audit. - Narrowed `set_stamp_for_test` to `cfg(test)` (its only caller is the refusal test) — removes a dead-code warning in the failpoints build. Branch create/delete atomicity improves (single atomic `__manifest` op). No behavior change for reads or branches. Follow-up (separate commit): the now-always-0 `IoCounts::commit_graph_reads` test counter + its `IOTracker`, threaded through ~11 cost-test files. --- crates/omnigraph/src/db/commit_graph.rs | 458 +----------------- crates/omnigraph/src/db/graph_coordinator.rs | 214 ++------ .../omnigraph/src/db/manifest/migrations.rs | 5 +- crates/omnigraph/src/db/manifest/publisher.rs | 5 - crates/omnigraph/src/db/manifest/recovery.rs | 13 +- crates/omnigraph/src/db/omnigraph.rs | 67 +-- crates/omnigraph/src/db/omnigraph/optimize.rs | 97 +--- .../omnigraph/src/db/omnigraph/table_ops.rs | 8 - crates/omnigraph/src/db/recovery_audit.rs | 19 +- crates/omnigraph/src/exec/merge.rs | 1 - crates/omnigraph/src/failpoints.rs | 2 - crates/omnigraph/src/instrumentation.rs | 28 +- crates/omnigraph/tests/branching.rs | 21 - crates/omnigraph/tests/failpoints.rs | 119 ----- crates/omnigraph/tests/forbidden_apis.rs | 3 +- crates/omnigraph/tests/lineage_projection.rs | 54 +-- crates/omnigraph/tests/maintenance.rs | 91 ++-- 17 files changed, 140 insertions(+), 1065 deletions(-) diff --git a/crates/omnigraph/src/db/commit_graph.rs b/crates/omnigraph/src/db/commit_graph.rs index d0f18620..ad6831ad 100644 --- a/crates/omnigraph/src/db/commit_graph.rs +++ b/crates/omnigraph/src/db/commit_graph.rs @@ -1,18 +1,6 @@ use std::collections::{HashMap, VecDeque}; -use std::sync::Arc; -use arrow_array::{ - RecordBatch, RecordBatchIterator, StringArray, TimestampMicrosecondArray, UInt64Array, -}; -use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit}; -use lance::Dataset; -use lance::dataset::{WriteMode, WriteParams}; -use lance_file::version::LanceFileVersion; - -use crate::error::{OmniError, Result}; - -const GRAPH_COMMITS_DIR: &str = "_graph_commits.lance"; -const GRAPH_COMMIT_ACTORS_DIR: &str = "_graph_commit_actors.lance"; +use crate::error::Result; #[derive(Debug, Clone)] pub struct GraphCommit { @@ -25,60 +13,32 @@ pub struct GraphCommit { pub created_at: i64, } +/// A pure projection of the graph lineage that lives in `__manifest` +/// (`graph_commit` + `graph_head` rows, RFC-013 Phase 7). It opens NO Lance +/// dataset (Phase B retired `_graph_commits.lance` / `_graph_commit_actors.lance`): +/// the in-memory cache is built from `ManifestCoordinator::read_graph_lineage_at`, +/// and branch authority lives entirely in `__manifest`. Reads +/// (`head_commit`/`load_commits`/`get_commit`/`merge_base`) and writes +/// (`insert_committed`, fed by the coordinator's manifest publish CAS) both work +/// off this projection. pub struct CommitGraph { root_uri: String, - /// Handle on `_graph_commits.lance` at the active branch, held only for the - /// branch-management WRITES (`create_branch`, formerly `version`) and - /// `refresh`. It is a DERIVED artifact (RFC-013 Phase 7): graph lineage lives - /// in `__manifest`, and reads (`head_commit`/`load_commits`/`get_commit`/ - /// `merge_base`) never touch it. `None` means the branch's - /// `_graph_commits.lance` ref is missing (an interrupted fork-reclaim or a - /// `cleanup` race) while the manifest lineage is still authoritative — so the - /// READS stay correct and only a subsequent `create_branch` surfaces the loud - /// actionable error. Mirrors `actor_dataset`'s best-effort `Option`. - dataset: Option, - actor_dataset: Option, active_branch: Option, - actor_by_commit_id: HashMap, commit_by_id: HashMap, head_commit: Option, } impl CommitGraph { - /// Create the commit-graph datasets for a fresh graph. The genesis - /// `graph_commit` + `graph_head` rows live in `__manifest` (folded into the - /// init write — RFC-013 Phase 7), so `_graph_commits.lance` is created EMPTY - /// here: it exists only to carry the Lance branch refs that `create_branch` / - /// `list_branches` / the `cleanup` orphan reconciler operate on. No commit - /// rows are ever written to it. The in-memory cache is sourced from the - /// manifest projection — the same path as [`open`], so genesis is seen - /// identically whether the graph was just initialized or reopened. + /// Seed the in-memory cache for a fresh graph from the `__manifest` genesis + /// lineage (folded into the manifest init write — RFC-013 Phase 7). No Lance + /// dataset is created or opened — the projection sees genesis identically to + /// [`open`]. pub async fn init(root_uri: &str) -> Result { let root = root_uri.trim_end_matches('/'); - let uri = graph_commits_uri(root); - - let batch = RecordBatch::new_empty(commit_graph_schema()); - let reader = RecordBatchIterator::new(vec![Ok(batch)], commit_graph_schema()); - let params = WriteParams { - mode: WriteMode::Create, - enable_stable_row_ids: true, - data_storage_version: Some(LanceFileVersion::V2_2), - auto_cleanup: None, - skip_auto_cleanup: true, - ..Default::default() - }; - let dataset = Dataset::write(reader, &uri as &str, Some(params)) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - let actor_dataset = create_commit_actor_dataset(root).await?; - let (commit_by_id, head_commit) = load_commit_cache_from_manifest(root, None).await?; Ok(Self { root_uri: root.to_string(), - dataset: Some(dataset), - actor_dataset: Some(actor_dataset), active_branch: None, - actor_by_commit_id: HashMap::new(), commit_by_id, head_commit, }) @@ -98,32 +58,10 @@ impl CommitGraph { pub async fn open(root_uri: &str) -> Result { let root = root_uri.trim_end_matches('/'); - let wrapper = crate::instrumentation::commit_graph_wrapper(); - let dataset = - crate::instrumentation::open_dataset_tracked(&graph_commits_uri(root), wrapper.clone()) - .await?; - let actor_dataset = - crate::instrumentation::open_dataset_tracked(&graph_commit_actors_uri(root), wrapper) - .await - .ok(); - // RFC-013 step 4: source the in-memory cache from the `__manifest` - // lineage projection (which carries the actor inline), not from - // `_graph_commits.lance`. The dataset handles above are retained for the - // branch-management ops (create/delete/list/version) that still target - // the commit-graph dataset; the actor dataset is only kept for the - // dual-write append path. The projection-equivalence gate proves this - // cache equals the prior `_graph_commits.lance` read. A pre-Phase-7 (v3) - // graph not yet migrated falls back to the legacy read — see - // `load_commit_cache_for_branch`. let (commit_by_id, head_commit) = load_commit_cache_for_branch(root, None).await?; Ok(Self { root_uri: root.to_string(), - // `open` targets main and never checks out a branch (main cannot be - // deleted/recreated), so the handle is always present here. - dataset: Some(dataset), - actor_dataset, active_branch: None, - actor_by_commit_id: HashMap::new(), commit_by_id, head_commit, }) @@ -131,256 +69,25 @@ impl CommitGraph { pub async fn open_at_branch(root_uri: &str, branch: &str) -> Result { let root = root_uri.trim_end_matches('/'); - let wrapper = crate::instrumentation::commit_graph_wrapper(); - let dataset = - crate::instrumentation::open_dataset_tracked(&graph_commits_uri(root), wrapper.clone()) - .await?; - // Best-effort checkout of the DERIVED `_graph_commits.lance` branch ref. - // It is held only for `create_branch` (a write); the lineage READ below - // comes from `__manifest`. A missing ref (interrupted fork-reclaim / - // `cleanup` race) must not wedge the read, so a typed not-found yields a - // `None` handle — a subsequent `create_branch` then surfaces the loud - // error. Any OTHER open error (transient IO / corrupt) still propagates, - // matching the `force_delete_branch` not-found idiom. - let dataset = match dataset.checkout_branch(branch).await { - Ok(ds) => Some(ds), - Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => None, - Err(e) => return Err(OmniError::Lance(e.to_string())), - }; - let actor_dataset = - crate::instrumentation::open_dataset_tracked(&graph_commit_actors_uri(root), wrapper) - .await - .ok(); - // Hard `?`: the manifest existence gate. `load_commit_cache_for_branch` - // opens the branch's `__manifest` (its own `checkout_branch` on the - // authoritative table), so a TRULY absent branch still fails loudly here — - // only the derived `_graph_commits.lance` ref is allowed to be missing. + // `load_commit_cache_for_branch` opens the branch's `__manifest` (the + // authoritative table), so a truly absent branch fails loudly here. let (commit_by_id, head_commit) = load_commit_cache_for_branch(root, Some(branch)).await?; Ok(Self { root_uri: root.to_string(), - dataset, - actor_dataset, active_branch: Some(branch.to_string()), - actor_by_commit_id: HashMap::new(), commit_by_id, head_commit, }) } pub async fn refresh(&mut self) -> Result<()> { - let root = self.root_uri.clone(); - let wrapper = crate::instrumentation::commit_graph_wrapper(); - let dataset = crate::instrumentation::open_dataset_tracked( - &graph_commits_uri(&root), - wrapper.clone(), - ) - .await?; - // Same best-effort checkout as `open_at_branch`: a missing DERIVED branch - // ref leaves the handle `None` (only `create_branch` then errors), while - // the in-memory cache re-syncs from the authoritative manifest below. - self.dataset = match &self.active_branch { - Some(branch) => match dataset.checkout_branch(branch).await { - Ok(ds) => Some(ds), - Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => None, - Err(e) => return Err(OmniError::Lance(e.to_string())), - }, - None => Some(dataset), - }; - self.actor_dataset = - crate::instrumentation::open_dataset_tracked(&graph_commit_actors_uri(&root), wrapper) - .await - .ok(); let (commit_by_id, head_commit) = - load_commit_cache_for_branch(&root, self.active_branch.as_deref()).await?; + load_commit_cache_for_branch(&self.root_uri, self.active_branch.as_deref()).await?; self.commit_by_id = commit_by_id; self.head_commit = head_commit; Ok(()) } - pub async fn create_branch(&mut self, name: &str) -> Result<()> { - // The held `_graph_commits.lance` handle is the only thing that can fork a - // branch ref. If it is missing (an interrupted fork-reclaim or a `cleanup` - // race dropped the derived ref while manifest lineage stayed authoritative), - // fail loudly + actionably rather than silently. Repair is the existing - // `cleanup` orphan reconciler (`reconcile_commit_graph_orphans`), not an - // inline write on this path. - let Some(dataset) = &self.dataset else { - let branch = self.active_branch.as_deref().unwrap_or("main"); - return Err(OmniError::manifest_internal(format!( - "commit-graph branch ref for '{branch}' is missing; run `omnigraph cleanup` then retry" - ))); - }; - let version = dataset.version().version; - let mut ds = dataset.clone(); - ds.create_branch(name, version, None) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - Ok(()) - } - - pub async fn delete_branch(&mut self, name: &str) -> Result<()> { - let mut ds = Dataset::open(&graph_commits_uri(&self.root_uri)) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - ds.delete_branch(name) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - self.refresh().await - } - - /// Idempotently drop the commit-graph branch `name`, tolerating an - /// already-absent branch (see [`TableStore::force_delete_branch`] for the - /// same semantics). Used by the best-effort reclaim in `branch_delete` and - /// the `cleanup` orphan reconciler. `RefConflict` (referencing descendants) - /// is still surfaced. - pub async fn force_delete_branch(&mut self, name: &str) -> Result<()> { - let mut ds = Dataset::open(&graph_commits_uri(&self.root_uri)) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - match ds.force_delete_branch(name).await { - Ok(()) => {} - Err(lance::Error::RefNotFound { .. }) | Err(lance::Error::NotFound { .. }) => {} - Err(e) => return Err(OmniError::Lance(e.to_string())), - } - self.refresh().await - } - - /// List the named branches present on the commit-graph dataset. The - /// `cleanup` reconciler diffs this against the manifest branch set to find - /// orphaned commit-graph branches to reclaim. - pub async fn list_branches(&self) -> Result> { - let ds = Dataset::open(&graph_commits_uri(&self.root_uri)) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - let branches = ds - .list_branches() - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - Ok(branches.into_keys().collect()) - } - - // DEAD as of RFC-013 Phase 7: graph commits are recorded in `__manifest` - // (folded into the publish CAS), never appended to `_graph_commits.lance`. - // These append helpers are retained only because the actor sidecar table they - // touch is still enumerated by `optimize` (internal-table compaction); they - // have no caller on any write path. The single-source invariant is guarded by - // `tests/lineage_projection.rs`, which fails if `_graph_commits.lance` ever - // gains a commit row. Do NOT call these to record a commit — use the - // coordinator's `commit_*_with_actor` / `commit_merge_with_actor`, which carry - // the lineage intent into the manifest publish. - #[allow(dead_code)] - async fn append_commit( - &mut self, - manifest_branch: Option<&str>, - manifest_version: u64, - actor_id: Option<&str>, - ) -> Result { - let parent_commit_id = self.head_commit_id().await?; - self.append_commit_with_parents( - manifest_branch, - manifest_version, - parent_commit_id.as_deref(), - None, - actor_id, - ) - .await - } - - #[allow(dead_code)] - async fn append_merge_commit( - &mut self, - manifest_branch: Option<&str>, - manifest_version: u64, - parent_commit_id: &str, - merged_parent_commit_id: &str, - actor_id: Option<&str>, - ) -> Result { - self.append_commit_with_parents( - manifest_branch, - manifest_version, - Some(parent_commit_id), - Some(merged_parent_commit_id), - actor_id, - ) - .await - } - - #[allow(dead_code)] - async fn append_commit_with_parents( - &mut self, - manifest_branch: Option<&str>, - manifest_version: u64, - parent_commit_id: Option<&str>, - merged_parent_commit_id: Option<&str>, - actor_id: Option<&str>, - ) -> Result { - let graph_commit_id = ulid::Ulid::new().to_string(); - let commit = GraphCommit { - graph_commit_id: graph_commit_id.clone(), - manifest_branch: manifest_branch.map(|s| s.to_string()), - manifest_version, - parent_commit_id: parent_commit_id.map(|s| s.to_string()), - merged_parent_commit_id: merged_parent_commit_id.map(|s| s.to_string()), - actor_id: actor_id.map(str::to_string), - created_at: crate::db::now_micros()?, - }; - - let batch = commits_to_batch(&[commit.clone()])?; - let reader = RecordBatchIterator::new(vec![Ok(batch)], commit_graph_schema()); - // This helper is dead on every write path (RFC-013 Phase 7) — reached only - // by the transitional v3 fixtures, which always hold the commits dataset. - // A `None` here would be a fixture bug, so fail loudly rather than silently. - let mut ds = self - .dataset - .clone() - .ok_or_else(|| OmniError::manifest_internal("commit-graph dataset is missing"))?; - ds.append(reader, None) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - self.dataset = Some(ds); - if let Some(actor_id) = actor_id { - self.append_actor(&graph_commit_id, actor_id).await?; - } - self.commit_by_id - .insert(graph_commit_id.clone(), commit.clone()); - if should_replace_head(self.head_commit.as_ref(), &commit) { - self.head_commit = Some(commit); - } - - Ok(graph_commit_id) - } - - #[allow(dead_code)] // RFC-013 Phase 7: dead — see `append_commit`. - async fn append_actor(&mut self, graph_commit_id: &str, actor_id: &str) -> Result<()> { - if self - .actor_by_commit_id - .get(graph_commit_id) - .is_some_and(|existing| existing == actor_id) - { - return Ok(()); - } - - let record = CommitActorRecord { - graph_commit_id: graph_commit_id.to_string(), - actor_id: actor_id.to_string(), - created_at: crate::db::now_micros()?, - }; - let batch = commit_actors_to_batch(&[record])?; - let reader = RecordBatchIterator::new(vec![Ok(batch)], commit_actor_schema()); - let mut dataset = match self.actor_dataset.take() { - Some(dataset) => dataset, - None => create_commit_actor_dataset(&self.root_uri).await?, - }; - dataset - .append(reader, None) - .await - .map_err(|e| OmniError::Lance(e.to_string()))?; - self.actor_by_commit_id - .insert(graph_commit_id.to_string(), actor_id.to_string()); - self.actor_dataset = Some(dataset); - Ok(()) - } - pub async fn head_commit(&self) -> Result> { Ok(self.head_commit.clone()) } @@ -454,109 +161,6 @@ impl CommitGraph { } } -pub(crate) fn graph_commits_uri(root_uri: &str) -> String { - format!("{}/{}", root_uri.trim_end_matches('/'), GRAPH_COMMITS_DIR) -} - -pub(crate) fn graph_commit_actors_uri(root_uri: &str) -> String { - format!( - "{}/{}", - root_uri.trim_end_matches('/'), - GRAPH_COMMIT_ACTORS_DIR - ) -} - -fn commit_graph_schema() -> SchemaRef { - Arc::new(Schema::new(vec![ - Field::new("graph_commit_id", DataType::Utf8, false), - Field::new("manifest_branch", DataType::Utf8, true), - Field::new("manifest_version", DataType::UInt64, false), - Field::new("parent_commit_id", DataType::Utf8, true), - Field::new("merged_parent_commit_id", DataType::Utf8, true), - Field::new( - "created_at", - DataType::Timestamp(TimeUnit::Microsecond, None), - false, - ), - ])) -} - -fn commit_actor_schema() -> SchemaRef { - Arc::new(Schema::new(vec![ - Field::new("graph_commit_id", DataType::Utf8, false), - Field::new("actor_id", DataType::Utf8, false), - Field::new( - "created_at", - DataType::Timestamp(TimeUnit::Microsecond, None), - false, - ), - ])) -} - -#[derive(Debug, Clone)] -struct CommitActorRecord { - graph_commit_id: String, - actor_id: String, - created_at: i64, -} - -async fn create_commit_actor_dataset(root_uri: &str) -> Result { - let uri = graph_commit_actors_uri(root_uri); - let batch = RecordBatch::new_empty(commit_actor_schema()); - let reader = RecordBatchIterator::new(vec![Ok(batch)], commit_actor_schema()); - let params = WriteParams { - mode: WriteMode::Create, - enable_stable_row_ids: true, - data_storage_version: Some(LanceFileVersion::V2_2), - auto_cleanup: None, - skip_auto_cleanup: true, - ..Default::default() - }; - match Dataset::write(reader, &uri as &str, Some(params)).await { - Ok(dataset) => Ok(dataset), - // Create-or-open idempotency: a concurrent/prior create raced us. Match - // the typed `DatasetAlreadyExists` variant, not the display string — the - // message is not a Lance API contract (a wording change would silently - // break this fallback). Pinned by - // `lance_surface_guards.rs::lance_error_dataset_already_exists_variant_exists`. - Err(lance::Error::DatasetAlreadyExists { .. }) => Dataset::open(&uri) - .await - .map_err(|open_err| OmniError::Lance(open_err.to_string())), - Err(err) => Err(OmniError::Lance(err.to_string())), - } -} - -fn commits_to_batch(commits: &[GraphCommit]) -> Result { - let ids: Vec<&str> = commits.iter().map(|c| c.graph_commit_id.as_str()).collect(); - let branches: Vec> = commits - .iter() - .map(|c| c.manifest_branch.as_deref()) - .collect(); - let versions: Vec = commits.iter().map(|c| c.manifest_version).collect(); - let parents: Vec> = commits - .iter() - .map(|c| c.parent_commit_id.as_deref()) - .collect(); - let merged_parents: Vec> = commits - .iter() - .map(|c| c.merged_parent_commit_id.as_deref()) - .collect(); - let created_at: Vec = commits.iter().map(|c| c.created_at).collect(); - - RecordBatch::try_new( - commit_graph_schema(), - vec![ - Arc::new(StringArray::from(ids)), - Arc::new(StringArray::from(branches)), - Arc::new(UInt64Array::from(versions)), - Arc::new(StringArray::from(parents)), - Arc::new(StringArray::from(merged_parents)), - Arc::new(TimestampMicrosecondArray::from(created_at)), - ], - ) - .map_err(|e| OmniError::Lance(e.to_string())) -} - /// Build the in-memory commit cache for `branch` from the `__manifest` /// graph-lineage projection (RFC-013 Phase 7) — the single source of lineage on a /// v4 graph. Sub-v4 graphs are refused at open (`refuse_if_stamp_unsupported`), @@ -569,10 +173,9 @@ async fn load_commit_cache_for_branch( } /// Build the in-memory commit cache from the `__manifest` graph-lineage -/// projection (RFC-013 step 4) rather than `_graph_commits.lance`. The lineage -/// rows carry the actor inline, so no separate actor-table read is needed. Head -/// selection is identical to [`load_commit_cache`] (`should_replace_head`), so -/// the resulting cache is equivalent to the prior `_graph_commits.lance` read. +/// projection (RFC-013 step 4). The lineage rows carry the actor inline, so no +/// separate actor-table read is needed. Head selection (`should_replace_head`) +/// matches the order `load_commits` reports. async fn load_commit_cache_from_manifest( root_uri: &str, branch: Option<&str>, @@ -599,28 +202,6 @@ async fn load_commit_cache_from_manifest( Ok((commit_by_id, head_commit)) } -fn commit_actors_to_batch(records: &[CommitActorRecord]) -> Result { - let commit_ids: Vec<&str> = records - .iter() - .map(|record| record.graph_commit_id.as_str()) - .collect(); - let actor_ids: Vec<&str> = records - .iter() - .map(|record| record.actor_id.as_str()) - .collect(); - let created_at: Vec = records.iter().map(|record| record.created_at).collect(); - - RecordBatch::try_new( - commit_actor_schema(), - vec![ - Arc::new(StringArray::from(commit_ids)), - Arc::new(StringArray::from(actor_ids)), - Arc::new(TimestampMicrosecondArray::from(created_at)), - ], - ) - .map_err(|e| OmniError::Lance(e.to_string())) -} - fn should_replace_head(current: Option<&GraphCommit>, candidate: &GraphCommit) -> bool { current.is_none_or(|existing| { candidate @@ -666,4 +247,3 @@ async fn open_for_branch(root_uri: &str, branch: Option<&str>) -> Result CommitGraph::open(root_uri).await, } } - diff --git a/crates/omnigraph/src/db/graph_coordinator.rs b/crates/omnigraph/src/db/graph_coordinator.rs index aff791d0..0b016f4d 100644 --- a/crates/omnigraph/src/db/graph_coordinator.rs +++ b/crates/omnigraph/src/db/graph_coordinator.rs @@ -6,7 +6,7 @@ use omnigraph_compiler::catalog::Catalog; use crate::error::{OmniError, Result}; use crate::failpoints; -use crate::storage::{StorageAdapter, join_uri, normalize_root_uri}; +use crate::storage::{StorageAdapter, normalize_root_uri}; use super::commit_graph::{CommitGraph, GraphCommit}; use super::is_internal_system_branch; @@ -14,8 +14,6 @@ use super::manifest::{ ManifestChange, ManifestCoordinator, ManifestIncarnation, Snapshot, SubTableUpdate, }; -const GRAPH_COMMITS_DIR: &str = "_graph_commits.lance"; - #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SnapshotId(String); @@ -95,7 +93,7 @@ pub struct GraphCoordinator { root_uri: String, storage: Arc, manifest: ManifestCoordinator, - commit_graph: Option, + commit_graph: CommitGraph, bound_branch: Option, } @@ -108,15 +106,15 @@ impl GraphCoordinator { let root = normalize_root_uri(root_uri)?; // The genesis graph commit is folded into the manifest init write, so // `__manifest` is the single source of graph lineage from version one - // (RFC-013 Phase 7). `CommitGraph::init` then creates the empty - // branch-ref dataset and seeds its cache from that manifest genesis. + // (RFC-013 Phase 7). `CommitGraph::init` then seeds its cache from that + // manifest genesis — it opens no Lance dataset (Phase B). let manifest = ManifestCoordinator::init(&root, catalog).await?; let commit_graph = CommitGraph::init(&root).await?; Ok(Self { root_uri: root, storage, manifest, - commit_graph: Some(commit_graph), + commit_graph, bound_branch: None, }) } @@ -124,11 +122,7 @@ impl GraphCoordinator { pub async fn open(root_uri: &str, storage: Arc) -> Result { let root = normalize_root_uri(root_uri)?; let manifest = ManifestCoordinator::open(&root).await?; - let commit_graph = if storage.exists(&graph_commits_uri(&root)).await? { - Some(CommitGraph::open(&root).await?) - } else { - None - }; + let commit_graph = CommitGraph::open(&root).await?; Ok(Self { root_uri: root, storage, @@ -150,11 +144,7 @@ impl GraphCoordinator { let root = normalize_root_uri(root_uri)?; let manifest = ManifestCoordinator::open_at_branch(&root, &branch_name).await?; - let commit_graph = if storage.exists(&graph_commits_uri(&root)).await? { - Some(CommitGraph::open_at_branch(&root, &branch_name).await?) - } else { - None - }; + let commit_graph = CommitGraph::open_at_branch(&root, &branch_name).await?; Ok(Self { root_uri: root, @@ -187,9 +177,7 @@ impl GraphCoordinator { pub async fn refresh(&mut self) -> Result<()> { self.manifest.refresh().await?; - if let Some(commit_graph) = &mut self.commit_graph { - commit_graph.refresh().await?; - } + self.commit_graph.refresh().await?; Ok(()) } @@ -234,46 +222,12 @@ impl GraphCoordinator { pub async fn branch_create(&mut self, name: &str) -> Result<()> { let branch = normalize_branch_name(name)? .ok_or_else(|| OmniError::manifest("cannot create branch 'main'".to_string()))?; - self.ensure_commit_graph_initialized().await?; - - // Manifest authority flip first. - self.manifest.create_branch(&branch).await?; - - // Derived commit-graph branch. If anything after the authority flip - // fails, roll back the manifest branch so the branch never half-exists - // (a manifest branch with no commit-graph branch breaks the next write). - if let Err(err) = self.create_commit_graph_branch(&branch).await { - if let Err(rollback_err) = self.manifest.delete_branch(&branch).await { - tracing::warn!( - target: "omnigraph::branch_create", - branch = %branch, - error = %rollback_err, - "rollback of manifest branch failed after commit-graph create failure", - ); - } - return Err(err); - } - Ok(()) - } - /// Create the derived commit-graph branch for `branch`, healing a zombie ref - /// left by an incomplete prior delete. The manifest branch was just created - /// fresh, so any existing commit-graph branch with this name is provably - /// orphaned and is force-dropped before recreating. - async fn create_commit_graph_branch(&mut self, branch: &str) -> Result<()> { - failpoints::maybe_fail(crate::failpoints::names::BRANCH_CREATE_AFTER_MANIFEST_BRANCH_CREATE)?; - let Some(commit_graph) = &mut self.commit_graph else { - return Ok(()); - }; - if commit_graph - .list_branches() - .await? - .iter() - .any(|existing| existing == branch) - { - commit_graph.force_delete_branch(branch).await?; - } - commit_graph.create_branch(branch).await + // Manifest is the single branch authority (it forks `__manifest` first). + // The commit graph is a pure `__manifest` projection (Phase B), so a + // branch create is one atomic manifest op — no derived commit-graph + // branch to fork, and nothing to roll back. + self.manifest.create_branch(&branch).await } pub async fn branch_delete(&mut self, name: &str) -> Result<()> { @@ -286,43 +240,11 @@ impl GraphCoordinator { ))); } - // Manifest authority flip — the single atomic op that makes the branch - // cease to exist. Must succeed; everything after is derived state - // reclaimed best-effort. - self.manifest.delete_branch(&branch).await?; - - // Commit-graph branch is derived state. Reclaim best-effort with the - // idempotent force variant: a failure here (or a missing dataset) is - // reconciled by `cleanup` and must not fail the delete after the - // authority already flipped. - if let Err(err) = self.reclaim_commit_graph_branch(&branch).await { - tracing::warn!( - target: "omnigraph::branch_delete::cleanup", - branch = %branch, - error = %err, - "best-effort commit-graph branch reclaim failed; cleanup will reconcile", - ); - } - - Ok(()) - } - - /// Best-effort, idempotent reclaim of the commit-graph branch `branch`. - /// Tolerates an absent commit-graph dataset (a graph that never committed). - async fn reclaim_commit_graph_branch(&mut self, branch: &str) -> Result<()> { - failpoints::maybe_fail(crate::failpoints::names::BRANCH_DELETE_BEFORE_COMMIT_GRAPH_RECLAIM)?; - if let Some(commit_graph) = &mut self.commit_graph { - commit_graph.force_delete_branch(branch).await - } else if self - .storage - .exists(&graph_commits_uri(self.root_uri())) - .await? - { - let mut commit_graph = CommitGraph::open(self.root_uri()).await?; - commit_graph.force_delete_branch(branch).await - } else { - Ok(()) - } + // Manifest is the single branch authority (Phase B): one atomic op makes + // the branch cease to exist. The commit graph is a pure `__manifest` + // projection with no derived branch to reclaim; the per-table data forks + // are reclaimed by `cleanup`, not here. + self.manifest.delete_branch(&branch).await } pub async fn snapshot_at_version(&self, version: u64) -> Result { @@ -398,20 +320,13 @@ impl GraphCoordinator { } pub async fn resolve_commit(&self, snapshot_id: &SnapshotId) -> Result { - if let Some(commit_graph) = &self.commit_graph { - if let Some(commit) = commit_graph.get_commit(snapshot_id.as_str()) { - return Ok(commit); - } + if let Some(commit) = self.commit_graph.get_commit(snapshot_id.as_str()) { + return Ok(commit); } for branch in self.manifest.list_branches().await? { let normalized = normalize_branch_name(&branch)?; - let Some(commit_graph) = self - .open_commit_graph_for_branch(normalized.as_deref()) - .await? - else { - break; - }; + let commit_graph = self.open_commit_graph_for_branch(normalized.as_deref()).await?; if let Some(commit) = commit_graph.get_commit(snapshot_id.as_str()) { return Ok(commit); } @@ -424,36 +339,10 @@ impl GraphCoordinator { } pub(crate) async fn head_commit_id(&self) -> Result> { - match &self.commit_graph { - Some(commit_graph) => commit_graph - .head_commit_id() - .await - .map(|id| id.map(SnapshotId::new)), - None => Ok(None), - } - } - - pub(crate) async fn ensure_commit_graph_initialized(&mut self) -> Result<()> { - if self.commit_graph.is_some() { - return Ok(()); - } - if !self - .storage - .exists(&graph_commits_uri(self.root_uri())) - .await? - { - // A graph opened without a commit-graph dataset gets the empty - // branch-ref dataset created lazily here. Graph lineage lives in - // `__manifest` (RFC-013 Phase 7) — a graph initialized by current - // code already carries its genesis there, and the commit graph - // sources its cache from it. No genesis is written here. - CommitGraph::init(self.root_uri()).await?; - } - self.commit_graph = match self.current_branch() { - Some(branch) => Some(CommitGraph::open_at_branch(self.root_uri(), branch).await?), - None => Some(CommitGraph::open(self.root_uri()).await?), - }; - Ok(()) + self.commit_graph + .head_commit_id() + .await + .map(|id| id.map(SnapshotId::new)) } pub(crate) async fn commit_updates_with_actor( @@ -503,7 +392,6 @@ impl GraphCoordinator { expected_table_versions: &HashMap, actor_id: Option<&str>, ) -> Result { - self.ensure_commit_graph_initialized().await?; let intent = self.new_lineage_intent(actor_id, None)?; failpoints::maybe_fail(crate::failpoints::names::GRAPH_PUBLISH_BEFORE_COMMIT_APPEND)?; let outcome = self @@ -530,7 +418,6 @@ impl GraphCoordinator { merged_parent_commit_id: &str, actor_id: Option<&str>, ) -> Result { - self.ensure_commit_graph_initialized().await?; let intent = self.new_lineage_intent(actor_id, Some(merged_parent_commit_id.to_string()))?; failpoints::maybe_fail(crate::failpoints::names::GRAPH_PUBLISH_BEFORE_COMMIT_APPEND)?; @@ -565,20 +452,12 @@ impl GraphCoordinator { /// intent + the publisher-resolved parent + the new manifest version. No /// storage I/O: the durable write already happened in the publish CAS, and /// this keeps a same-handle read's `head_commit_id` consistent with the - /// snapshot it just advanced. Falls back to a synthetic id only when the - /// commit graph is somehow absent (never on a real write). + /// snapshot it just advanced. fn apply_lineage_to_cache( &mut self, intent: crate::db::manifest::LineageIntent, outcome: &crate::db::manifest::CommitOutcome, ) -> SnapshotId { - let Some(commit_graph) = &mut self.commit_graph else { - return SnapshotId::synthetic( - self.bound_branch.as_deref(), - outcome.version, - self.manifest.incarnation().e_tag.as_deref(), - ); - }; let commit = GraphCommit { graph_commit_id: intent.graph_commit_id.clone(), manifest_branch: intent.branch, @@ -588,51 +467,22 @@ impl GraphCoordinator { actor_id: intent.actor_id, created_at: intent.created_at, }; - commit_graph.insert_committed(commit); + self.commit_graph.insert_committed(commit); SnapshotId::new(intent.graph_commit_id) } - async fn open_commit_graph_for_branch( - &self, - branch: Option<&str>, - ) -> Result> { - if !self - .storage - .exists(&graph_commits_uri(self.root_uri())) - .await? - { - return Ok(None); + async fn open_commit_graph_for_branch(&self, branch: Option<&str>) -> Result { + match branch { + Some(branch) => CommitGraph::open_at_branch(self.root_uri(), branch).await, + None => CommitGraph::open(self.root_uri()).await, } - let graph = match branch { - Some(branch) => CommitGraph::open_at_branch(self.root_uri(), branch).await?, - None => CommitGraph::open(self.root_uri()).await?, - }; - Ok(Some(graph)) } pub(crate) async fn list_commits(&self) -> Result> { - if let Some(commit_graph) = &self.commit_graph { - return commit_graph.load_commits().await; - } - if !self - .storage - .exists(&graph_commits_uri(self.root_uri())) - .await? - { - return Ok(Vec::new()); - } - let commit_graph = match self.current_branch() { - Some(branch) => CommitGraph::open_at_branch(self.root_uri(), branch).await?, - None => CommitGraph::open(self.root_uri()).await?, - }; - commit_graph.load_commits().await + self.commit_graph.load_commits().await } } -fn graph_commits_uri(root_uri: &str) -> String { - join_uri(root_uri, GRAPH_COMMITS_DIR) -} - /// Wrap each `SubTableUpdate` as a `ManifestChange::Update` for the publisher. fn updates_to_changes(updates: &[SubTableUpdate]) -> Vec { updates diff --git a/crates/omnigraph/src/db/manifest/migrations.rs b/crates/omnigraph/src/db/manifest/migrations.rs index 95d593da..372e2fbc 100644 --- a/crates/omnigraph/src/db/manifest/migrations.rs +++ b/crates/omnigraph/src/db/manifest/migrations.rs @@ -128,9 +128,8 @@ async fn set_stamp(dataset: &mut Dataset, version: u32) -> Result<()> { /// Test-only: force the on-disk internal-schema stamp to `version`. The minimal /// seam used to synthesize a sub-CURRENT graph and assert the open path refuses -/// it. Gated on `test` OR `failpoints` so the integration binaries (compiled -/// without `cfg(test)`) can reach it too. -#[cfg(any(test, feature = "failpoints"))] +/// it. Its only caller is the in-source refusal test, so it is `cfg(test)`-only. +#[cfg(test)] pub(crate) async fn set_stamp_for_test(dataset: &mut Dataset, version: u32) -> Result<()> { set_stamp(dataset, version).await } diff --git a/crates/omnigraph/src/db/manifest/publisher.rs b/crates/omnigraph/src/db/manifest/publisher.rs index 2639360e..39d7932a 100644 --- a/crates/omnigraph/src/db/manifest/publisher.rs +++ b/crates/omnigraph/src/db/manifest/publisher.rs @@ -804,11 +804,6 @@ impl ManifestBatchPublisher for GraphNamespacePublisher { /// contention; if the caller's `expected_table_versions` still holds against /// the new manifest state, we re-attempt. Other conflict variants (notably /// `ExpectedVersionMismatch`) propagate so the caller learns immediately. -/// -/// Shared (`pub(crate)`) with the v3→v4 lineage backfill's re-open retry loop -/// (`migrations::migrate_v3_to_v4`), so the migration's retry decision matches the -/// publisher's by construction — both retry exactly `RowLevelCasContention` and -/// propagate everything else. pub(crate) fn is_retryable_publish_conflict(err: &OmniError) -> bool { matches!( err, diff --git a/crates/omnigraph/src/db/manifest/recovery.rs b/crates/omnigraph/src/db/manifest/recovery.rs index c32c4937..cc9c501a 100644 --- a/crates/omnigraph/src/db/manifest/recovery.rs +++ b/crates/omnigraph/src/db/manifest/recovery.rs @@ -296,13 +296,12 @@ pub(crate) struct RecoverySidecar { pub writer_kind: SidecarKind, pub tables: Vec, /// For `SidecarKind::BranchMerge` only: the source branch's HEAD - /// commit id at the time the sidecar was written. Used by the - /// recovery sweep's audit step to call `append_merge_commit` - /// (recording `merged_parent_commit_id`) instead of `append_commit`, - /// so future merges between the same pair recognize "already up-to- - /// date" and merge-base computations stay correct. Optional for - /// backward compatibility — older sidecars (or non-BranchMerge - /// kinds) carry `None` and recovery falls back to `append_commit`. + /// commit id at the time the sidecar was written. Recovery replays the + /// merge commit recording this as its `merged_parent_commit_id` (folded + /// into the manifest publish, RFC-013 Phase 7), so future merges between + /// the same pair recognize "already up-to-date" and merge-base + /// computations stay correct. Optional — older sidecars (or + /// non-BranchMerge kinds) carry `None`, recording a plain commit. #[serde(default, skip_serializing_if = "Option::is_none")] pub merge_source_commit_id: Option, /// SchemaApply-only: tables the writer was about to register diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 96b837e5..12f02757 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -175,8 +175,7 @@ pub struct Omnigraph { /// an explicit target coord parameter so `self.coordinator` is never /// used as scratch space — is the round-1 shape applied to /// `branch_create_from_impl`. Deferred because it requires unwinding - /// every `self.snapshot()` and `self.ensure_commit_graph_initialized()` - /// call inside the merge body. + /// every `self.snapshot()` call inside the merge body. merge_exclusive: Arc>, /// Optional policy checker for engine-layer enforcement (MR-722). /// `None` = no enforcement; mutating methods are unconditionally @@ -1530,8 +1529,7 @@ impl Omnigraph { &self, branch: Option<&str>, ) -> Result> { - let mut coordinator = self.open_coordinator_for_branch(branch).await?; - coordinator.ensure_commit_graph_initialized().await?; + let coordinator = self.open_coordinator_for_branch(branch).await?; coordinator .head_commit_id() .await @@ -1838,10 +1836,6 @@ impl Omnigraph { .await } - pub(crate) async fn ensure_commit_graph_initialized(&self) -> Result<()> { - table_ops::ensure_commit_graph_initialized(self).await - } - /// Invalidate the cached graph index. Called after edge mutations. pub(crate) async fn invalidate_graph_index(&self) { table_ops::invalidate_graph_index(self).await @@ -2539,11 +2533,7 @@ edge WorksAt: Person -> Company .exists_checks() .contains(&join_uri(uri, "__schema_state.json")) ); - assert!( - adapter - .exists_checks() - .contains(&join_uri(uri, "_graph_commits.lance")) - ); + // (Phase B retired `_graph_commits.lance`: open no longer probes for it.) } async fn table_rows_json(db: &Omnigraph, table_key: &str) -> Vec { @@ -2712,57 +2702,6 @@ edge WorksAt: Person -> Company assert!(result.applied, "schema apply should have applied"); } - /// Regression (MR-770): a pre-v0.4.0 graph that still carries a stale - /// `__run__*` branch on `__manifest` must not block schema apply. The - /// v2→v3 sweep runs in `Omnigraph::open(ReadWrite)` — before the - /// schema-apply blocking-branch check — so apply succeeds with no - /// intervening publish. - /// - /// Confirmed to fail before the open-time migration landed: the reopened - /// graph still listed `__run__legacy`, and `apply_schema` returned - /// "found non-main branches: __run__legacy". - #[tokio::test] - async fn legacy_run_branch_is_swept_on_open_and_does_not_block_schema_apply() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); - - // Synthesize a legacy graph: a stale `__run__` branch on `__manifest` - // plus the manifest stamp rewound to v2 (pre-sweep). - db.branch_create("__run__legacy").await.unwrap(); - drop(db); - { - // forbidden-api-allow: test synthesizes a legacy graph by editing __manifest directly. - let mut ds = lance::Dataset::open(&format!("{}/__manifest", uri)) - .await - .unwrap(); - ds.update_schema_metadata([( - "omnigraph:internal_schema_version".to_string(), - Some("2".to_string()), - )]) - .await - .unwrap(); - } - - // Reopen (ReadWrite): the open-time migration must sweep `__run__legacy` - // before any branch-observing code runs. - let db = Omnigraph::open(uri).await.unwrap(); - let branches = db.branch_list().await.unwrap(); - assert!( - !branches.iter().any(|b| b.starts_with("__run__")), - "open-time migration must sweep legacy __run__ branches; got {branches:?}", - ); - - // Schema apply must proceed with no intervening publish — the - // blocking-branch check no longer sees `__run__legacy`. - let desired = TEST_SCHEMA.replace( - " age: I32?\n}", - " age: I32?\n nickname: String?\n}", - ); - let result = db.apply_schema(&desired).await.unwrap(); - assert!(result.applied, "schema apply should have applied"); - } - #[tokio::test] async fn test_apply_schema_defers_index_then_reconciler_builds_it() { // iss-848: schema apply records the @index intent but builds nothing diff --git a/crates/omnigraph/src/db/omnigraph/optimize.rs b/crates/omnigraph/src/db/omnigraph/optimize.rs index bae0c881..de652c2c 100644 --- a/crates/omnigraph/src/db/omnigraph/optimize.rs +++ b/crates/omnigraph/src/db/omnigraph/optimize.rs @@ -284,32 +284,18 @@ pub async fn optimize_all_tables(db: &Omnigraph) -> Result, @@ -1178,65 +1163,9 @@ pub async fn reconcile_orphaned_branches(db: &Omnigraph) -> Result, - stats: &mut BranchReconcileStats, -) -> Result<()> { - let commits_uri = crate::db::commit_graph::graph_commits_uri(db.root_uri()); - if !db.storage_adapter().exists(&commits_uri).await? { - return Ok(()); - } - let mut commit_graph = crate::db::commit_graph::CommitGraph::open(db.root_uri()).await?; - for branch in orphan_branches(commit_graph.list_branches().await?, keep) { - match commit_graph.force_delete_branch(&branch).await { - Ok(()) => stats.reclaimed.push(("_graph_commits".to_string(), branch)), - Err(err) => { - tracing::warn!( - target: "omnigraph::cleanup", - branch = %branch, - error = %err, - "reclaiming orphaned commit-graph branch failed; will retry next cleanup", - ); - stats - .failures - .push(("_graph_commits".to_string(), err.to_string())); - } - } - } - Ok(()) -} - -/// Filter `present` Lance branches down to those absent from the manifest -/// `keep` set, ordered children-before-parents (longest name first) so Lance's -/// referenced-parent `RefConflict` cannot block reclamation. -fn orphan_branches(present: Vec, keep: &std::collections::HashSet) -> Vec { - let mut orphans: Vec = present - .into_iter() - .filter(|branch| !keep.contains(branch)) - .collect(); - orphans.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b))); - orphans -} - pub(super) fn all_table_keys(catalog: &omnigraph_compiler::catalog::Catalog) -> Vec { let mut keys: Vec = catalog .node_types diff --git a/crates/omnigraph/src/db/omnigraph/table_ops.rs b/crates/omnigraph/src/db/omnigraph/table_ops.rs index f73f2a26..f28a1767 100644 --- a/crates/omnigraph/src/db/omnigraph/table_ops.rs +++ b/crates/omnigraph/src/db/omnigraph/table_ops.rs @@ -1419,14 +1419,6 @@ pub(super) async fn commit_updates_on_branch_with_expected( .await } -pub(super) async fn ensure_commit_graph_initialized(db: &Omnigraph) -> Result<()> { - db.coordinator - .write() - .await - .ensure_commit_graph_initialized() - .await -} - pub(super) async fn invalidate_graph_index(db: &Omnigraph) { db.runtime_cache.invalidate_all().await; } diff --git a/crates/omnigraph/src/db/recovery_audit.rs b/crates/omnigraph/src/db/recovery_audit.rs index 34447737..8247c1bc 100644 --- a/crates/omnigraph/src/db/recovery_audit.rs +++ b/crates/omnigraph/src/db/recovery_audit.rs @@ -1,17 +1,15 @@ //! Recovery audit row storage in `_graph_commit_recoveries.lance`. //! -//! Sibling to `_graph_commits.lance` (`commit_graph.rs`). Each successful +//! A standalone internal table (not catalog-tracked). Each successful //! recovery sweep — roll-forward or roll-back — records one row here so //! operators investigating a sidecar-attributed mutation can correlate //! `omnigraph commit list --filter actor=omnigraph:recovery` with the //! original actor whose mutation was rolled forward / back. //! -//! Sibling-table is additive: it doesn't bump -//! `INTERNAL_MANIFEST_SCHEMA_VERSION`, and can be removed in favor of a -//! schema migration later if the join cost matters. The schema-migration -//! alternative (adding `recovery_for_actor` and `recovery_kind` columns -//! to `_graph_commits.lance` itself) was considered and rejected to keep -//! this change additive. +//! This standalone table is additive: it doesn't bump +//! `INTERNAL_MANIFEST_SCHEMA_VERSION`. Folding `recovery_for_actor` and +//! `recovery_kind` into the `__manifest` `graph_commit` rows instead was +//! considered and rejected to keep this change additive. //! //! Atomicity caveat: append to `_graph_commit_recoveries.lance` is //! sequential w.r.t. the recovery commit, which RFC-013 Phase 7 records in @@ -100,8 +98,7 @@ pub(crate) struct RecoveryAudit { impl RecoveryAudit { /// Open the recovery-audit dataset for the graph, or return a handle - /// with no dataset yet (created on first append). Mirrors the - /// optional-dataset pattern from `_graph_commit_actors.lance`. + /// with no dataset yet (it is created lazily on the first append). pub(crate) async fn open(root_uri: &str) -> Result { let root = root_uri.trim_end_matches('/').to_string(); let dataset = Dataset::open(&recoveries_uri(&root)).await.ok(); @@ -112,8 +109,8 @@ impl RecoveryAudit { } /// Append one recovery audit record. Lazily initializes the dataset - /// on first call (idempotent under racy creation via the same - /// `Dataset already exists` rebound as `_graph_commit_actors.lance`). + /// on first call (idempotent under racy creation: a `Dataset already + /// exists` error is rebound to an open of the just-created dataset). pub(crate) async fn append(&mut self, record: RecoveryAuditRecord) -> Result<()> { let batch = recovery_record_to_batch(&record)?; let reader = RecordBatchIterator::new(vec![Ok(batch)], recoveries_schema()); diff --git a/crates/omnigraph/src/exec/merge.rs b/crates/omnigraph/src/exec/merge.rs index 0bf37903..8c36f454 100644 --- a/crates/omnigraph/src/exec/merge.rs +++ b/crates/omnigraph/src/exec/merge.rs @@ -1524,7 +1524,6 @@ impl Omnigraph { is_fast_forward: bool, actor_id: Option<&str>, ) -> Result { - self.ensure_commit_graph_initialized().await?; let target_snapshot = self.snapshot().await; let mut table_keys = HashSet::new(); diff --git a/crates/omnigraph/src/failpoints.rs b/crates/omnigraph/src/failpoints.rs index 1eca23af..ee1281a6 100644 --- a/crates/omnigraph/src/failpoints.rs +++ b/crates/omnigraph/src/failpoints.rs @@ -38,8 +38,6 @@ pub(crate) fn maybe_fail_retryable_contention(name: &str) -> Result<()> { /// reference these constants instead of bare string literals, so a typo is a /// compile error rather than a silently-never-firing failpoint. pub mod names { - pub const BRANCH_CREATE_AFTER_MANIFEST_BRANCH_CREATE: &str = "branch_create.after_manifest_branch_create"; - pub const BRANCH_DELETE_BEFORE_COMMIT_GRAPH_RECLAIM: &str = "branch_delete.before_commit_graph_reclaim"; pub const BRANCH_DELETE_BEFORE_TABLE_CLEANUP: &str = "branch_delete.before_table_cleanup"; pub const BRANCH_MERGE_ADOPT_AFTER_APPEND_PRE_UPSERT: &str = "branch_merge.adopt_after_append_pre_upsert"; pub const BRANCH_MERGE_ADOPT_AFTER_UPSERT_PRE_DELETE: &str = "branch_merge.adopt_after_upsert_pre_delete"; diff --git a/crates/omnigraph/src/instrumentation.rs b/crates/omnigraph/src/instrumentation.rs index 97186869..fbe9e696 100644 --- a/crates/omnigraph/src/instrumentation.rs +++ b/crates/omnigraph/src/instrumentation.rs @@ -45,9 +45,9 @@ pub struct QueryIoProbes { pub probe_count: Arc, /// Counts DATA-table open CALLS through the two instrumented chokepoints /// (`open_dataset_tracked` / `open_table_dataset`), classified by URI so the - /// internal/system tables (`__manifest`, `_graph_commits*`) are EXCLUDED — the - /// publisher CAS and commit-graph append open those every write, and counting - /// them would make the `data_open_count <= |touched_tables|` write gate + /// internal/system tables (`__manifest`) are EXCLUDED — the publisher CAS + /// opens those every write, and counting them would make the + /// `data_open_count <= |touched_tables|` write gate /// (RFC-013 step 3b) unreachable by threading alone. Unlike the opener-read /// term (which mixes with the merge-insert/RI scan on the write path), this is /// an exact open-invocation count. `forbidden_apis` keeps engine code OUTSIDE the @@ -57,8 +57,8 @@ pub struct QueryIoProbes { /// does hold direct `Dataset::open`s — but only for branch-management ops /// (`delete_branch`/`list_branches`/`force_delete_branch`), never that hot path.) pub data_open_count: Arc, - /// Internal/system-table (`__manifest`, `_graph_commits*`) open CALLS — the - /// complement of `data_open_count`, kept for symmetry and debugging. + /// Internal/system-table (`__manifest`) open CALLS — the complement of + /// `data_open_count`, kept for symmetry and debugging. pub internal_open_count: Arc, } @@ -83,10 +83,6 @@ pub(crate) fn manifest_wrapper() -> Option> { current(|p| p.manifest_wrapper.clone()).flatten() } -pub(crate) fn commit_graph_wrapper() -> Option> { - current(|p| p.commit_graph_wrapper.clone()).flatten() -} - pub(crate) fn table_wrapper() -> Option> { current(|p| p.table_wrapper.clone()).flatten() } @@ -98,15 +94,9 @@ pub(crate) fn record_probe() { } /// Internal/system table directory names. An open of one of these is a metadata -/// open (publisher CAS, commit-graph append, recovery audit), NOT a data-table -/// open. Kept in sync with the dir constants in `db/manifest/layout.rs`, -/// `db/commit_graph.rs`, and `db/recovery_audit.rs`. -const INTERNAL_TABLE_DIRS: [&str; 4] = [ - "__manifest", - "_graph_commits.lance", - "_graph_commit_actors.lance", - "_graph_commit_recoveries.lance", -]; +/// open (publisher CAS, recovery audit), NOT a data-table open. Kept in sync with +/// the dir constants in `db/manifest/layout.rs` and `db/recovery_audit.rs`. +const INTERNAL_TABLE_DIRS: [&str; 2] = ["__manifest", "_graph_commit_recoveries.lance"]; /// True when `uri`'s last path segment names an internal/system table. fn open_is_internal(uri: &str) -> bool { @@ -117,7 +107,7 @@ fn open_is_internal(uri: &str) -> bool { /// Record one table-open call against the active per-query probes, classified by /// table class (the URI's last segment) so the write gate counts DATA-table opens -/// only and ignores the publisher/commit-graph metadata opens. No-op in production +/// only and ignores the publisher metadata opens. No-op in production /// (the classification runs only inside the probe closure, which `current` skips /// when no probes are installed). Called at both open chokepoints. pub(crate) fn record_open(uri: &str) { diff --git a/crates/omnigraph/tests/branching.rs b/crates/omnigraph/tests/branching.rs index bd98c9c2..7edc6a4b 100644 --- a/crates/omnigraph/tests/branching.rs +++ b/crates/omnigraph/tests/branching.rs @@ -1428,27 +1428,6 @@ async fn branch_merge_reports_cardinality_violation_conflict() { } } -#[tokio::test] -async fn branch_create_bootstraps_missing_commit_graph() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let db = init_and_load(&dir).await; - drop(db); - - fs::remove_dir_all(dir.path().join("_graph_commits.lance")).unwrap(); - - let mut reopened = Omnigraph::open(uri).await.unwrap(); - reopened.branch_create("feature").await.unwrap(); - - assert!(dir.path().join("_graph_commits.lance").exists()); - - let feature = Omnigraph::open(uri).await.unwrap(); - assert_eq!( - count_rows_branch(&feature, "feature", "node:Person").await, - 4 - ); -} - #[tokio::test] async fn branch_api_rejects_reserved_main_and_same_source_target_merge() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/omnigraph/tests/failpoints.rs b/crates/omnigraph/tests/failpoints.rs index 93fa12a4..f5f4b8f2 100644 --- a/crates/omnigraph/tests/failpoints.rs +++ b/crates/omnigraph/tests/failpoints.rs @@ -31,22 +31,6 @@ fn node_table_uri(root: &str, type_name: &str) -> String { format!("{}/nodes/{hash:016x}", root.trim_end_matches('/')) } -#[tokio::test] -#[serial] -async fn branch_create_failpoint_triggers() { - let _scenario = FailScenario::setup(); - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - let db = Omnigraph::init(uri, helpers::TEST_SCHEMA).await.unwrap(); - let _failpoint = ScopedFailPoint::new(names::BRANCH_CREATE_AFTER_MANIFEST_BRANCH_CREATE, "return"); - - let err = db.branch_create("feature").await.unwrap_err(); - assert!( - err.to_string() - .contains("injected failpoint triggered: branch_create.after_manifest_branch_create") - ); -} - // Branch delete flips the manifest authority first, then reclaims the per-table // forks best-effort. A failure during that reclaim (here, the // `branch_delete.before_table_cleanup` failpoint, standing in for a transient @@ -361,50 +345,6 @@ async fn cleanup_isolates_reconcile_failure() { } } -// The cleanup reconciler must reclaim orphaned commit-graph branches, not just -// per-table forks. A delete whose best-effort commit-graph reclaim fails leaves -// a commit-graph orphan; the next cleanup must drop it. -#[tokio::test] -#[serial] -async fn cleanup_reclaims_orphaned_commit_graph_branch() { - let _scenario = FailScenario::setup(); - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap().to_string(); - let mut db = helpers::init_and_load(&dir).await; - - db.branch_create("feature").await.unwrap(); - // Delete, failing the commit-graph reclaim → commit-graph "feature" orphan - // (manifest branch gone, commit-graph branch left behind). - { - let _fp = ScopedFailPoint::new(names::BRANCH_DELETE_BEFORE_COMMIT_GRAPH_RECLAIM, "return"); - db.branch_delete("feature").await.unwrap(); - } - - let commits_uri = format!("{}/_graph_commits.lance", uri.trim_end_matches('/')); - { - let ds = lance::Dataset::open(&commits_uri).await.unwrap(); - assert!( - ds.list_branches().await.unwrap().contains_key("feature"), - "precondition: the commit-graph branch should be orphaned after the failed reclaim" - ); - } - - db.cleanup(omnigraph::db::CleanupPolicyOptions { - keep_versions: Some(1), - older_than: None, - }) - .await - .unwrap(); - - { - let ds = lance::Dataset::open(&commits_uri).await.unwrap(); - assert!( - !ds.list_branches().await.unwrap().contains_key("feature"), - "cleanup should reclaim the orphaned commit-graph branch" - ); - } -} - // `classify_fork_ref` returns `Indeterminate` when the fresh-authority read // fails on a LIVE branch — and a destructive caller must SKIP, never delete, on // that ambiguity. Here the reconciler has a genuine origin-2 orphan candidate @@ -468,65 +408,6 @@ async fn reconcile_skips_fork_when_fresh_recheck_is_unavailable_then_converges() } } -// A branch_delete whose best-effort commit-graph reclaim fails leaves a -// commit-graph "zombie" branch. Recreating that name must heal the zombie and -// succeed (branch_create force-deletes a stale commit-graph ref since the -// manifest branch is created fresh), instead of dying on the leftover ref. -#[tokio::test] -#[serial] -async fn branch_create_recreates_over_commit_graph_zombie() { - let _scenario = FailScenario::setup(); - let dir = tempfile::tempdir().unwrap(); - let db = Omnigraph::init(dir.path().to_str().unwrap(), helpers::TEST_SCHEMA) - .await - .unwrap(); - - db.branch_create("feature").await.unwrap(); - { - // Fail the best-effort commit-graph reclaim → commit-graph "feature" - // zombie survives the delete (manifest authority still flips). - let _fp = ScopedFailPoint::new(names::BRANCH_DELETE_BEFORE_COMMIT_GRAPH_RECLAIM, "return"); - db.branch_delete("feature").await.unwrap(); - } - assert_eq!(db.branch_list().await.unwrap(), vec!["main".to_string()]); - - db.branch_create("feature") - .await - .expect("branch_create should heal the zombie commit-graph branch and succeed"); - assert!( - db.branch_list() - .await - .unwrap() - .contains(&"feature".to_string()) - ); -} - -// branch_create is authority-then-derived: if the derived commit-graph branch -// cannot be created, the manifest branch (the authority) must be rolled back so -// the branch does not half-exist. The existing failpoint fires right after the -// manifest create, standing in for any post-authority failure. -#[tokio::test] -#[serial] -async fn branch_create_rolls_back_manifest_on_commit_graph_failure() { - let _scenario = FailScenario::setup(); - let dir = tempfile::tempdir().unwrap(); - let db = Omnigraph::init(dir.path().to_str().unwrap(), helpers::TEST_SCHEMA) - .await - .unwrap(); - - let err = { - let _fp = ScopedFailPoint::new(names::BRANCH_CREATE_AFTER_MANIFEST_BRANCH_CREATE, "return"); - db.branch_create("feature").await.unwrap_err() - }; - assert!( - !db.branch_list() - .await - .unwrap() - .contains(&"feature".to_string()), - "branch_create must roll back the manifest branch when the derived \ - commit-graph branch fails, got error: {err}" - ); -} // A fork collision must be classified by the manifest authority, not by Lance // branch versions. When a concurrent first-write legitimately wins the fork diff --git a/crates/omnigraph/tests/forbidden_apis.rs b/crates/omnigraph/tests/forbidden_apis.rs index 1a7d855f..6f5ccfcf 100644 --- a/crates/omnigraph/tests/forbidden_apis.rs +++ b/crates/omnigraph/tests/forbidden_apis.rs @@ -108,11 +108,10 @@ const FORBIDDEN_PATTERNS: &[&str] = &[ /// Files exempt from the guard. These are the legitimate storage-layer /// or manifest-layer implementations that USE the forbidden APIs to /// provide the staged primitives or to maintain the system tables -/// (commit graph, manifest). +/// (manifest, recovery audit). const ALLOW_LIST_FILES: &[&str] = &[ "table_store.rs", // The storage layer itself. "storage_layer.rs", // The trait module. - "commit_graph.rs", // Maintains `_graph_commits.lance` system table. "graph_coordinator.rs", // Drives the manifest publisher / branch coordinator. "recovery_audit.rs", // Maintains `_graph_commit_recoveries.lance` (recovery audit trail). "instrumentation.rs", // The instrumented dataset opener (open_dataset_tracked / open_table_dataset). diff --git a/crates/omnigraph/tests/lineage_projection.rs b/crates/omnigraph/tests/lineage_projection.rs index e2a67628..239beb16 100644 --- a/crates/omnigraph/tests/lineage_projection.rs +++ b/crates/omnigraph/tests/lineage_projection.rs @@ -9,36 +9,22 @@ //! reads `__manifest`) reconstructs the full lineage correctly — commit set, //! parents, the merge commit's two parents + merge actor, per-branch heads, //! and the inline actors. -//! 2. `_graph_commits.lance` (and its actor sidecar) hold ZERO commit rows: the -//! dual-write is gone and nothing appends to them. This is the load-bearing -//! "single source" assertion. +//! 2. `_graph_commits.lance` and `_graph_commit_actors.lance` are NEVER CREATED +//! (Phase B retired them — the commit graph opens no Lance dataset). This is +//! the load-bearing "single source" assertion. mod helpers; -use futures::TryStreamExt; -use lance::Dataset; - use omnigraph::db::commit_graph::CommitGraph; use omnigraph::db::{GraphCommit, Omnigraph}; use helpers::*; -/// Count rows in a Lance dataset directory under the graph root, or `0` if it -/// does not exist. -async fn row_count(root: &str, dir: &str) -> usize { - let uri = format!("{}/{}", root.trim_end_matches('/'), dir); - let Ok(dataset) = Dataset::open(&uri).await else { - return 0; - }; - let batches: Vec = dataset - .scan() - .try_into_stream() - .await - .unwrap() - .try_collect() - .await - .unwrap(); - batches.iter().map(|b| b.num_rows()).sum() +/// True when a Lance dataset directory exists under the graph root. +fn table_dir_exists(root: &str, dir: &str) -> bool { + std::path::Path::new(root.trim_end_matches('/')) + .join(dir) + .exists() } /// The production commit-graph projection at `branch`, sourced from `__manifest`. @@ -145,19 +131,19 @@ async fn graph_lineage_lives_only_in_manifest() { "expected a real merge, not fast-forward/up-to-date" ); - // ── single source: nothing writes `_graph_commits.lance` ───────────────── - // RFC-013 Phase 7 folds lineage into `__manifest`; the commit-graph dataset - // exists only to carry branch refs, so it (and its actor sidecar) hold ZERO - // commit rows. If a stray `append_commit` reappears, this turns red. - assert_eq!( - row_count(&uri, "_graph_commits.lance").await, - 0, - "_graph_commits.lance must carry no commit rows — lineage lives in __manifest" + // ── single source: the commit-graph tables are never created ───────────── + // RFC-013 Phase 7 folds lineage into `__manifest`; Phase B then retired the + // two commit-graph datasets entirely (the projection opens no Lance dataset). + // A full realistic history — commits on main, a branch, a merge, all with + // actors — must not bring either directory into existence. If a stray + // dataset write reappears, this turns red. + assert!( + !table_dir_exists(&uri, "_graph_commits.lance"), + "_graph_commits.lance must never be created — lineage lives in __manifest" ); - assert_eq!( - row_count(&uri, "_graph_commit_actors.lance").await, - 0, - "_graph_commit_actors.lance must carry no rows — actors live inline in __manifest" + assert!( + !table_dir_exists(&uri, "_graph_commit_actors.lance"), + "_graph_commit_actors.lance must never be created — actors live inline in __manifest" ); // ── main lineage projected from `__manifest` ───────────────────────────── diff --git a/crates/omnigraph/tests/maintenance.rs b/crates/omnigraph/tests/maintenance.rs index 46e6d96c..031de726 100644 --- a/crates/omnigraph/tests/maintenance.rs +++ b/crates/omnigraph/tests/maintenance.rs @@ -94,25 +94,24 @@ async fn optimize_on_empty_graph_returns_stats_per_table_with_no_changes() { let stats = db.optimize().await.unwrap(); - // Schema declares 2 nodes + 2 edges = 4 data tables, plus the 3 internal - // system tables (`__manifest`, `_graph_commits`, `_graph_commit_actors`) optimize - // also compacts (RFC-013 step 2) = 7. Compaction should run on each but find - // nothing to merge. The genesis graph commit rides the SINGLE init + // Schema declares 2 nodes + 2 edges = 4 data tables, plus the one internal + // system table optimize compacts (`__manifest`, RFC-013 step 2) = 5. Graph + // lineage lives in `__manifest` (Phase B retired the commit-graph datasets), + // so there is no separate lineage table to compact. Compaction runs on each + // but finds nothing to merge: the genesis graph commit rides the SINGLE init // `__manifest` write (RFC-013 Phase 7), so a fresh graph has one fragment per // table — nothing to compact anywhere. - assert_eq!(stats.len(), 7); + assert_eq!(stats.len(), 5); for s in &stats { assert_eq!(s.fragments_removed, 0, "{} should not remove", s.table_key); assert_eq!(s.fragments_added, 0, "{} should not add", s.table_key); } - // The internal tables are present and reported as no-ops on an empty graph. - for key in ["__manifest", "_graph_commits", "_graph_commit_actors"] { - let s = stats - .iter() - .find(|s| s.table_key == key) - .unwrap_or_else(|| panic!("optimize stats missing internal table {key}")); - assert!(!s.committed, "{key} should be a no-op on an empty graph"); - } + // `__manifest` is present and reported as a no-op on an empty graph. + let s = stats + .iter() + .find(|s| s.table_key == "__manifest") + .expect("optimize stats missing internal table __manifest"); + assert!(!s.committed, "__manifest should be a no-op on an empty graph"); } #[tokio::test] @@ -145,14 +144,14 @@ async fn optimize_after_load_then_again_is_idempotent() { } } -/// RFC-013 step 2 + Phase 7: `optimize` compacts `__manifest`, which now -/// accumulates one fragment per commit for BOTH the table-version rows and the -/// folded-in graph-lineage rows (`graph_commit` + `graph_head`). The -/// commit-graph datasets (`_graph_commits`, `_graph_commit_actors`) no longer -/// take a per-commit row (lineage lives in `__manifest`), so they stay flat — -/// nothing to compact. After compaction `__manifest` sheds fragments, writes no -/// recovery sidecar (a single atomic Lance commit — no HEAD-before-publish gap), -/// and the graph stays coherent for subsequent reads + strict writes. +/// RFC-013 step 2 + Phase 7 + Phase B: `optimize` compacts `__manifest`, which +/// now accumulates one fragment per commit for BOTH the table-version rows and the +/// folded-in graph-lineage rows (`graph_commit` + `graph_head`). Graph lineage +/// lives entirely in `__manifest` (Phase B retired the commit-graph datasets), so +/// `__manifest` is the only internal table optimize compacts. After compaction +/// `__manifest` sheds fragments, writes no recovery sidecar (a single atomic Lance +/// commit — no HEAD-before-publish gap), and the graph stays coherent for +/// subsequent reads + strict writes. #[tokio::test] async fn optimize_compacts_internal_tables() { let dir = tempfile::tempdir().unwrap(); @@ -188,18 +187,14 @@ async fn optimize_compacts_internal_tables() { manifest_stats.fragments_removed ); - // The commit-graph datasets take no per-commit row anymore (RFC-013 Phase 7 - // folds lineage into `__manifest`), so they stay at one fragment — no-ops. - for key in ["_graph_commits", "_graph_commit_actors"] { - let s = stats + // `__manifest` is the only internal table optimize touches (Phase B retired + // the commit-graph datasets), so no `_graph_commits*` stat is emitted. + assert!( + !stats .iter() - .find(|s| s.table_key == key) - .unwrap_or_else(|| panic!("optimize stats missing internal table {key}")); - assert!( - !s.committed, - "{key} carries no per-commit rows after Phase 7 — nothing to compact" - ); - } + .any(|s| s.table_key == "_graph_commits" || s.table_key == "_graph_commit_actors"), + "no commit-graph datasets exist after Phase B — optimize must not report them" + ); // Internal compaction leaks no recovery sidecar. let recovery_dir = dir.path().join("__recovery"); @@ -227,38 +222,6 @@ async fn optimize_compacts_internal_tables() { .unwrap(); } -/// `optimize` must not fail on a graph that has no `_graph_commits.lance` — a valid -/// state the coordinator opens as `commit_graph = None` (graphs predating the commit -/// graph). Without the existence guard, `Dataset::open` on the absent table errors -/// and fails the whole optimize. Regression for the missing-existence-guard. -/// -/// Uses an EMPTY graph deliberately: a graph with data would publish during -/// optimize, and a publish records a graph commit that recreates `_graph_commits` -/// before the guard runs — masking the bug. With no data, nothing recreates it, so -/// the table stays absent through the guard. -#[tokio::test] -async fn optimize_tolerates_absent_graph_commits_table() { - let dir = tempfile::tempdir().unwrap(); - let uri = dir.path().to_str().unwrap(); - Omnigraph::init(uri, TEST_SCHEMA).await.unwrap(); - - // Simulate a graph with no commit-graph dataset. - std::fs::remove_dir_all(dir.path().join("_graph_commits.lance")).unwrap(); - - // Coordinator tolerates the absence; optimize must succeed (the guard skips the - // absent table rather than letting `Dataset::open` error) and omit its stat. - let db = Omnigraph::open(uri).await.unwrap(); - let stats = db.optimize().await.unwrap(); - assert!( - stats.iter().any(|s| s.table_key == "__manifest"), - "__manifest must still be compacted" - ); - assert!( - !stats.iter().any(|s| s.table_key == "_graph_commits"), - "absent _graph_commits must be skipped, not opened (would error)" - ); -} - /// `optimize` must stay NON-DESTRUCTIVE on a pre-`auto_cleanup`-fix upgraded graph: /// `compact_files` would otherwise fire the dataset's stored `lance.auto_cleanup.*` /// hook (version GC) during the compaction commit. Internal-table compaction clears From 1b96cbf19ab06da302606b274bc787740277dcee Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 14:52:04 +0200 Subject: [PATCH 4/8] feat: surface the internal-schema (storage-format) version to operators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After stranding storage versioning (a sub-v4 graph is refused on open), operators could only discover the storage-format version by hitting a refusal. Surface it: - `omnigraph version` prints an `internal-schema ` line (the binary's CURRENT storage-format version). - `omnigraph snapshot` includes `internal_schema_version` — the GRAPH's per-branch on-disk stamp, read via the new `Omnigraph::internal_schema_version_of`. - `GET /healthz` includes `internal_schema_version` (server-scoped: the binary's CURRENT, alongside `version`/`source_version`). Wire: re-expose `INTERNAL_MANIFEST_SCHEMA_VERSION` as `pub` on `db::manifest`; add `internal_schema_version: u32` to `SnapshotOutput` + `HealthOutput`; `snapshot_payload` takes the per-graph version (the `Snapshot` does not carry it), threaded through the embedded CLI + server snapshot callers. `openapi.json` regenerated (two added int32 properties). Extends the existing healthz / snapshot / version tests. --- crates/omnigraph-api-types/src/lib.rs | 12 +++++++++++- crates/omnigraph-cli/src/client.rs | 5 ++++- crates/omnigraph-cli/src/main.rs | 11 ++++++++++- crates/omnigraph-cli/src/output.rs | 8 +++++++- crates/omnigraph-cli/tests/cli_data.rs | 8 ++++++++ .../omnigraph-cli/tests/cli_schema_config.rs | 13 ++++++++++--- crates/omnigraph-server/src/handlers.rs | 19 +++++++++++++++---- crates/omnigraph-server/src/lib.rs | 3 +++ crates/omnigraph-server/tests/auth_policy.rs | 4 ++++ crates/omnigraph-server/tests/data_routes.rs | 4 ++++ crates/omnigraph-server/tests/openapi.rs | 2 ++ crates/omnigraph/src/db/manifest.rs | 6 ++++++ crates/omnigraph/src/db/omnigraph.rs | 10 ++++++++++ openapi.json | 16 +++++++++++++++- 14 files changed, 109 insertions(+), 12 deletions(-) diff --git a/crates/omnigraph-api-types/src/lib.rs b/crates/omnigraph-api-types/src/lib.rs index 32bc7534..cff10f2a 100644 --- a/crates/omnigraph-api-types/src/lib.rs +++ b/crates/omnigraph-api-types/src/lib.rs @@ -43,6 +43,9 @@ pub struct SnapshotTableOutput { pub struct SnapshotOutput { pub branch: String, pub manifest_version: u64, + /// The on-disk internal-schema (storage-format) version this graph's branch + /// is stamped at. + pub internal_schema_version: u32, pub tables: Vec, } @@ -538,6 +541,8 @@ pub struct CommitListQuery { pub struct HealthOutput { pub status: String, pub version: String, + /// The internal-schema (storage-format) version this binary writes and reads. + pub internal_schema_version: u32, #[serde(skip_serializing_if = "Option::is_none")] pub source_version: Option, } @@ -587,7 +592,11 @@ pub struct ErrorOutput { pub manifest_conflict: Option, } -pub fn snapshot_payload(branch: &str, snapshot: &Snapshot) -> SnapshotOutput { +pub fn snapshot_payload( + branch: &str, + snapshot: &Snapshot, + internal_schema_version: u32, +) -> SnapshotOutput { let mut entries: Vec<_> = snapshot.entries().cloned().collect(); entries.sort_by(|a, b| a.table_key.cmp(&b.table_key)); let tables = entries @@ -603,6 +612,7 @@ pub fn snapshot_payload(branch: &str, snapshot: &Snapshot) -> SnapshotOutput { SnapshotOutput { branch: branch.to_string(), manifest_version: snapshot.version(), + internal_schema_version, tables, } } diff --git a/crates/omnigraph-cli/src/client.rs b/crates/omnigraph-cli/src/client.rs index 7151f5e0..dca4b7a4 100644 --- a/crates/omnigraph-cli/src/client.rs +++ b/crates/omnigraph-cli/src/client.rs @@ -278,7 +278,10 @@ impl GraphClient { GraphClient::Embedded { uri, .. } => { let db = Omnigraph::open(uri).await?; let snapshot = db.snapshot_of(ReadTarget::branch(branch)).await?; - Ok(snapshot_payload(branch, &snapshot)) + let internal_schema_version = db + .internal_schema_version_of(ReadTarget::branch(branch)) + .await?; + Ok(snapshot_payload(branch, &snapshot, internal_schema_version)) } } } diff --git a/crates/omnigraph-cli/src/main.rs b/crates/omnigraph-cli/src/main.rs index fa6f4db1..184d2c7b 100644 --- a/crates/omnigraph-cli/src/main.rs +++ b/crates/omnigraph-cli/src/main.rs @@ -214,6 +214,10 @@ async fn main() -> Result<()> { } Command::Version => { println!("omnigraph {}", env!("CARGO_PKG_VERSION")); + println!( + "internal-schema {}", + omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION + ); } Command::Embed(args) => { let output = execute_embed(&args).await?; @@ -613,7 +617,12 @@ async fn main() -> Result<()> { if json { print_json(&payload)?; } else { - print_snapshot_human(&payload.branch, payload.manifest_version, &payload.tables); + print_snapshot_human( + &payload.branch, + payload.manifest_version, + payload.internal_schema_version, + &payload.tables, + ); } } Command::Export { diff --git a/crates/omnigraph-cli/src/output.rs b/crates/omnigraph-cli/src/output.rs index 80de6257..101ace47 100644 --- a/crates/omnigraph-cli/src/output.rs +++ b/crates/omnigraph-cli/src/output.rs @@ -732,9 +732,15 @@ pub(crate) fn print_embed_human(output: &EmbedOutput) { ); } -pub(crate) fn print_snapshot_human(branch: &str, manifest_version: u64, entries: &[SnapshotTableOutput]) { +pub(crate) fn print_snapshot_human( + branch: &str, + manifest_version: u64, + internal_schema_version: u32, + entries: &[SnapshotTableOutput], +) { println!("branch: {}", branch); println!("manifest_version: {}", manifest_version); + println!("internal_schema_version: {}", internal_schema_version); for entry in entries { println!( "{} v{} branch={} rows={}", diff --git a/crates/omnigraph-cli/tests/cli_data.rs b/crates/omnigraph-cli/tests/cli_data.rs index 81e1aabc..8cbbdc59 100644 --- a/crates/omnigraph-cli/tests/cli_data.rs +++ b/crates/omnigraph-cli/tests/cli_data.rs @@ -1914,6 +1914,10 @@ fn snapshot_json_returns_manifest_version_and_tables() { payload["manifest_version"].as_u64().unwrap(), manifest_dataset_version(&graph) ); + assert_eq!( + payload["internal_schema_version"].as_u64().unwrap(), + u64::from(omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION) + ); assert!(payload["tables"].as_array().unwrap().len() >= 4); } @@ -1947,6 +1951,10 @@ fn snapshot_human_output_includes_branch_and_table_summaries() { assert!(stdout.contains("branch: main")); assert!(stdout.contains("manifest_version:")); + assert!(stdout.contains(&format!( + "internal_schema_version: {}", + omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION + ))); assert!(stdout.contains("node:Person v")); assert!(stdout.contains("edge:Knows v")); } diff --git a/crates/omnigraph-cli/tests/cli_schema_config.rs b/crates/omnigraph-cli/tests/cli_schema_config.rs index 5577aa8b..f38f44a1 100644 --- a/crates/omnigraph-cli/tests/cli_schema_config.rs +++ b/crates/omnigraph-cli/tests/cli_schema_config.rs @@ -18,9 +18,16 @@ fn version_command_prints_current_cli_version() { let output = output_success(cli().arg("version")); let stdout = stdout_string(&output); - assert_eq!( - stdout.trim(), - format!("omnigraph {}", env!("CARGO_PKG_VERSION")) + assert!( + stdout.contains(&format!("omnigraph {}", env!("CARGO_PKG_VERSION"))), + "version output must include the CLI version line, got: {stdout}" + ); + assert!( + stdout.contains(&format!( + "internal-schema {}", + omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION + )), + "version output must include the internal-schema line, got: {stdout}" ); } diff --git a/crates/omnigraph-server/src/handlers.rs b/crates/omnigraph-server/src/handlers.rs index 1571164d..1d0a5360 100644 --- a/crates/omnigraph-server/src/handlers.rs +++ b/crates/omnigraph-server/src/handlers.rs @@ -22,6 +22,7 @@ pub(crate) async fn server_health() -> Json { Json(HealthOutput { status: "ok".to_string(), version: SERVER_VERSION.to_string(), + internal_schema_version: SERVER_INTERNAL_SCHEMA_VERSION, source_version: SERVER_SOURCE_VERSION.map(str::to_string), }) } @@ -459,13 +460,23 @@ pub(crate) async fn server_snapshot( target_branch: None, }, )?; - let snapshot = { + let (snapshot, internal_schema_version) = { let db = &handle.engine; - db.snapshot_of(ReadTarget::branch(branch.as_str())) + let snapshot = db + .snapshot_of(ReadTarget::branch(branch.as_str())) .await - .map_err(ApiError::from_omni)? + .map_err(ApiError::from_omni)?; + let internal_schema_version = db + .internal_schema_version_of(ReadTarget::branch(branch.as_str())) + .await + .map_err(ApiError::from_omni)?; + (snapshot, internal_schema_version) }; - Ok(Json(snapshot_payload(&branch, &snapshot))) + Ok(Json(snapshot_payload( + &branch, + &snapshot, + internal_schema_version, + ))) } /// Header values that flag a response as coming from a deprecated route diff --git a/crates/omnigraph-server/src/lib.rs b/crates/omnigraph-server/src/lib.rs index fbc37d2b..d60a0dc4 100644 --- a/crates/omnigraph-server/src/lib.rs +++ b/crates/omnigraph-server/src/lib.rs @@ -148,6 +148,9 @@ const DEFAULT_REQUEST_BODY_LIMIT_BYTES: usize = 1_048_576; const INGEST_REQUEST_BODY_LIMIT_BYTES: usize = 32 * 1024 * 1024; const SERVER_VERSION: &str = env!("CARGO_PKG_VERSION"); const SERVER_SOURCE_VERSION: Option<&str> = option_env!("OMNIGRAPH_SOURCE_VERSION"); +/// The internal-schema (storage-format) version this binary writes and reads. +const SERVER_INTERNAL_SCHEMA_VERSION: u32 = + omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION; #[derive(Debug, Clone)] pub struct ServerConfig { diff --git a/crates/omnigraph-server/tests/auth_policy.rs b/crates/omnigraph-server/tests/auth_policy.rs index 5cbbb97c..db02772b 100644 --- a/crates/omnigraph-server/tests/auth_policy.rs +++ b/crates/omnigraph-server/tests/auth_policy.rs @@ -38,6 +38,10 @@ async fn healthz_succeeds_after_startup() { assert_eq!(status, StatusCode::OK); assert_eq!(body["status"], "ok"); assert_eq!(body["version"], env!("CARGO_PKG_VERSION")); + assert_eq!( + body["internal_schema_version"].as_u64().unwrap(), + u64::from(omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION) + ); match option_env!("OMNIGRAPH_SOURCE_VERSION") { Some(source_version) => assert_eq!(body["source_version"], source_version), None => assert!(body.get("source_version").is_none()), diff --git a/crates/omnigraph-server/tests/data_routes.rs b/crates/omnigraph-server/tests/data_routes.rs index 65af2c63..3e533288 100644 --- a/crates/omnigraph-server/tests/data_routes.rs +++ b/crates/omnigraph-server/tests/data_routes.rs @@ -112,6 +112,10 @@ async fn snapshot_route_returns_manifest_dataset_version() { snapshot_body["manifest_version"].as_u64().unwrap(), expected_manifest_version ); + assert_eq!( + snapshot_body["internal_schema_version"].as_u64().unwrap(), + u64::from(omnigraph::db::manifest::INTERNAL_MANIFEST_SCHEMA_VERSION) + ); assert!(snapshot_body["tables"].is_array()); } diff --git a/crates/omnigraph-server/tests/openapi.rs b/crates/omnigraph-server/tests/openapi.rs index 9276482d..2341bbc2 100644 --- a/crates/omnigraph-server/tests/openapi.rs +++ b/crates/omnigraph-server/tests/openapi.rs @@ -426,6 +426,7 @@ fn health_output_schema_has_expected_fields() { let props = schema["properties"].as_object().unwrap(); assert!(props.contains_key("status")); assert!(props.contains_key("version")); + assert!(props.contains_key("internal_schema_version")); assert!(props.contains_key("source_version")); } @@ -644,6 +645,7 @@ fn snapshot_output_schema_has_expected_fields() { let props = schema["properties"].as_object().unwrap(); assert!(props.contains_key("branch")); assert!(props.contains_key("manifest_version")); + assert!(props.contains_key("internal_schema_version")); assert!(props.contains_key("tables")); } diff --git a/crates/omnigraph/src/db/manifest.rs b/crates/omnigraph/src/db/manifest.rs index 9d6abd1c..72bca16a 100644 --- a/crates/omnigraph/src/db/manifest.rs +++ b/crates/omnigraph/src/db/manifest.rs @@ -49,6 +49,12 @@ pub(crate) use state::{GraphLineageRow, read_graph_lineage}; use state::string_column; use state::{ManifestState, read_manifest_state}; +/// The internal-schema (storage-format) version this binary writes and reads. +/// A graph's on-disk per-branch stamp is read via [`internal_schema_stamp_at`]; +/// this const is the binary's CURRENT. Surfaced to operators via `omnigraph +/// snapshot` and `omnigraph --version`. +pub const INTERNAL_MANIFEST_SCHEMA_VERSION: u32 = migrations::INTERNAL_MANIFEST_SCHEMA_VERSION; + const OBJECT_TYPE_TABLE: &str = "table"; const OBJECT_TYPE_TABLE_VERSION: &str = "table_version"; const OBJECT_TYPE_TABLE_TOMBSTONE: &str = "table_tombstone"; diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 12f02757..92025d85 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -903,6 +903,16 @@ impl Omnigraph { .map(|snapshot| snapshot.version()) } + /// The on-disk internal-schema version of `target`'s branch (the storage-format + /// version this graph is stamped at). Surfaced via `omnigraph snapshot`. + pub async fn internal_schema_version_of( + &self, + target: impl Into, + ) -> Result { + let branch = self.resolved_branch_of(target).await?; + crate::db::manifest::internal_schema_stamp_at(self.uri(), branch.as_deref()).await + } + pub async fn resolved_branch_of( &self, target: impl Into, diff --git a/openapi.json b/openapi.json index 7333248a..ba73e3e0 100644 --- a/openapi.json +++ b/openapi.json @@ -1964,9 +1964,16 @@ "type": "object", "required": [ "status", - "version" + "version", + "internal_schema_version" ], "properties": { + "internal_schema_version": { + "type": "integer", + "format": "int32", + "description": "The internal-schema (storage-format) version this binary writes and reads.", + "minimum": 0 + }, "source_version": { "type": [ "string", @@ -2491,12 +2498,19 @@ "required": [ "branch", "manifest_version", + "internal_schema_version", "tables" ], "properties": { "branch": { "type": "string" }, + "internal_schema_version": { + "type": "integer", + "format": "int32", + "description": "The on-disk internal-schema (storage-format) version this graph's branch\nis stamped at.", + "minimum": 0 + }, "manifest_version": { "type": "integer", "format": "int64", From ddd194a3319d952481ff22a588ead17fa2d3df85 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 15:22:45 +0200 Subject: [PATCH 5/8] docs(engine): gate internal-schema version at the graph level; record the per-branch read gap PR reviewers flagged that the open path validates only main's internal-schema stamp, so a branch read could decode a branch stamped outside this binary's range. The stamp is a graph-wide storage-format property (the upgrade path is a whole-graph export/import), so with one binary version every branch is always CURRENT; divergence needs concurrent multi-version writers, an unsupported topology already in one-winner-CAS territory. Gating per-branch would add a second __manifest open per non-main branch read to defend a state we do not support, unearned complexity that regresses the warm-read budget. Keep the graph-level gate, document it at the code site (refuse_if_internal_schema_unsupported), and record the read-only residual hole as a known gap in invariants.md to close only when multi-version write topologies become supported. Also clarify the sub-floor rebuild message to say "export with the older omnigraph binary that created it." No behavior change: HEAD already gated at the graph level. --- crates/omnigraph/src/db/manifest.rs | 13 +++++++++++-- .../omnigraph/src/db/manifest/migrations.rs | 4 ++-- docs/dev/invariants.md | 19 +++++++++++++++++++ 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/crates/omnigraph/src/db/manifest.rs b/crates/omnigraph/src/db/manifest.rs index 72bca16a..e887e45a 100644 --- a/crates/omnigraph/src/db/manifest.rs +++ b/crates/omnigraph/src/db/manifest.rs @@ -94,12 +94,21 @@ pub(crate) async fn internal_schema_stamp_at(root_uri: &str, branch: Option<&str Ok(migrations::read_stamp(&dataset)) } -/// Refuse to open a graph whose `__manifest` is stamped outside this binary's -/// supported internal-schema range (newer than CURRENT, or older than +/// Refuse to open a graph whose `__manifest` (main) is stamped outside this +/// binary's supported internal-schema range (newer than CURRENT, or older than /// MIN_SUPPORTED). Both open paths (read-write and read-only) call this before /// reading any data, so an old binary refuses a newer graph instead of silently /// misreading it, and this binary refuses a below-floor graph with a /// rebuild-via-export/import message instead of opening a format it can't read. +/// +/// The stamp is gated at the GRAPH level (main only). It is a graph-wide +/// storage-format property — the upgrade path is a whole-graph export/import, so +/// with one binary version every branch is always CURRENT (init stamps main, +/// `create_branch` forks the stamp, the publisher writes rows without +/// re-stamping). A branch stamped out of range while main stays in range is only +/// reachable with concurrent multi-version writers, an unsupported topology +/// (writes are refused per-branch by the publisher; a newer binary advancing +/// main is refused here). See the matching known gap in `docs/dev/invariants.md`. pub(crate) async fn refuse_if_internal_schema_unsupported(root_uri: &str) -> Result<()> { let stamp = internal_schema_stamp_at(root_uri, None).await?; migrations::refuse_if_stamp_unsupported(stamp) diff --git a/crates/omnigraph/src/db/manifest/migrations.rs b/crates/omnigraph/src/db/manifest/migrations.rs index 372e2fbc..6b79ad6b 100644 --- a/crates/omnigraph/src/db/manifest/migrations.rs +++ b/crates/omnigraph/src/db/manifest/migrations.rs @@ -109,8 +109,8 @@ pub(crate) fn refuse_if_stamp_unsupported(stamp: u32) -> Result<()> { if stamp < MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION { return Err(OmniError::manifest(format!( "__manifest is stamped at internal schema v{stamp}, but this omnigraph reads only v{current}. \ - This graph was created by an older release; rebuild it: run `omnigraph export` with that \ - older release, then `omnigraph init` + `omnigraph load` with this one. \ + This graph was created by an older omnigraph release; rebuild it: run `omnigraph export` with \ + the older omnigraph binary that created it, then `omnigraph init` + `omnigraph load` with this one. \ (Data, vectors, and blobs are preserved; commit history and branches are not.)", current = INTERNAL_MANIFEST_SCHEMA_VERSION, ))); diff --git a/docs/dev/invariants.md b/docs/dev/invariants.md index 94a83b0a..e6d1e425 100644 --- a/docs/dev/invariants.md +++ b/docs/dev/invariants.md @@ -257,6 +257,25 @@ them explicit. acknowledged-before-visible bug this branch fixed. Close it (local CAS primitive, or a trait-level lock requirement) before admitting any lock-free `if_match` caller. +- **Internal-schema stamp is validated at the graph (main) level only:** + `Omnigraph::open` reads the `omnigraph:internal_schema_version` stamp on + **main** and refuses an out-of-range graph (newer than CURRENT → "upgrade + omnigraph"; below MIN_SUPPORTED → "rebuild via export/import"). Branch reads + then trust that one check. This is correct by the storage-format contract: the + stamp is a graph-wide property (the upgrade path is a whole-graph + export/import), so with one binary version every branch is always CURRENT — + init stamps main, `create_branch` forks the stamp, the publisher writes rows + without re-stamping. A branch stamped out of range while main stays in range is + only reachable with concurrent **multi-version writers**, an unsupported + topology already in one-winner-CAS territory: writes to such a branch are + refused per-branch by the publisher (it reads its target's stamp), and a newer + binary advancing main is refused at open. The residual hole is read-only — + reading or merging a branch a newer binary advanced while main stayed old would + decode it with this binary's logic instead of refusing. Close it (a per-branch + read gate folded into the branch-manifest open the read already does, zero + extra I/O) only when multi-version write topologies are promoted to supported; + a separate stamp round-trip per branch read is the wrong shape (it regresses the + warm-read cost budget to defend an unsupported state). - **Manifest→commit-graph publish atomicity — CLOSED (RFC-013 Phase 7):** graph lineage now lives ONLY in `__manifest`, as `graph_commit` + `graph_head:` rows written in the SAME `MergeInsertBuilder` commit as the table-version rows From 24d8eaf221fe3ab0d1976e9853fc4709df35d9fa Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 15:36:13 +0200 Subject: [PATCH 6/8] test(cost): remove the dead commit_graph_reads IO counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase B retired _graph_commits.lance / _graph_commit_actors.lance, so no commit-graph dataset is opened and the commit_graph IOTracker term is structurally always 0. Remove IoCounts::commit_graph_reads, its total_reads() term, the commit_graph IOTracker in OpProbes, and the now-dead commit_graph_wrapper field on QueryIoProbes (it had no accessor — nothing ever attached it). Drop the 7 trivially-true assert_eq!(commit_graph_reads, 0) checks in warm_read_cost.rs and the debug-print refs in write_cost{,_s3}.rs. Lineage and actor rows now live in __manifest (RFC-013 Phase 7), so the internal_table_scans_are_flat_in_history gate folds into the single manifest_reads flat-assertion — the manifest scan already covers them. Harness-only; no production runtime impact. --- crates/omnigraph/src/instrumentation.rs | 1 - crates/omnigraph/tests/helpers/cost.rs | 30 +++++++---------- crates/omnigraph/tests/warm_read_cost.rs | 28 ---------------- crates/omnigraph/tests/write_cost.rs | 42 ++++++++++++------------ crates/omnigraph/tests/write_cost_s3.rs | 5 ++- 5 files changed, 34 insertions(+), 72 deletions(-) diff --git a/crates/omnigraph/src/instrumentation.rs b/crates/omnigraph/src/instrumentation.rs index fbe9e696..011c787c 100644 --- a/crates/omnigraph/src/instrumentation.rs +++ b/crates/omnigraph/src/instrumentation.rs @@ -36,7 +36,6 @@ use crate::storage::StorageAdapter; #[derive(Clone, Default)] pub struct QueryIoProbes { pub manifest_wrapper: Option>, - pub commit_graph_wrapper: Option>, /// Attached to the per-table data opens a query performs (the cache-miss /// path in `SubTableEntry::open`). Lets a cost test assert how many tables /// a query actually opened — N on a cold read, 0 on a warm repeat once the diff --git a/crates/omnigraph/tests/helpers/cost.rs b/crates/omnigraph/tests/helpers/cost.rs index 331bfb37..37ac9e8e 100644 --- a/crates/omnigraph/tests/helpers/cost.rs +++ b/crates/omnigraph/tests/helpers/cost.rs @@ -11,7 +11,7 @@ //! opener (RFC-013 step 3a's target, O(1) after the bypass) from the merge-insert/RI //! scan (O(fragment-count), compaction's domain) even though both ride the same //! `Dataset` — without controlling the fixture (no compaction needed). `__manifest` -//! and `_graph_commits` keep the plain `IOTracker` (no sub-prefixes worth splitting). +//! keeps the plain `IOTracker` (no sub-prefixes worth splitting). #![allow(dead_code)] use std::fmt; @@ -52,10 +52,9 @@ pub struct IoCounts { /// merge-insert/RI **scan**, which grows with fragment count (compaction's /// domain, not the opener). pub data_scan_reads: u64, - /// `__manifest` registry scans (publish state). + /// `__manifest` registry scans (publish state; includes the graph-lineage rows + /// folded into `__manifest` by RFC-013 Phase 7). pub manifest_reads: u64, - /// `_graph_commits` lineage scans. - pub commit_graph_reads: u64, /// Version-probe invocations (the cheap freshness check). pub version_probes: u64, /// DATA-table open CALL count through the two instrumented chokepoints — an @@ -63,14 +62,14 @@ pub struct IoCounts { /// internal/system-table opens are excluded. Step-3b target: /// `data_open_count <= |touched_tables|` for a write. pub data_open_count: u64, - /// Internal/system-table (`__manifest`, `_graph_commits*`) open CALL count — - /// the complement of `data_open_count` (publisher CAS + commit-graph append). + /// Internal/system-table (`__manifest`) open CALL count — the complement of + /// `data_open_count` (publisher CAS). pub internal_open_count: u64, } impl IoCounts { pub fn total_reads(&self) -> u64 { - self.data_reads + self.manifest_reads + self.commit_graph_reads + self.data_reads + self.manifest_reads } } @@ -277,11 +276,10 @@ pub async fn cost_harness(body: F) -> F::Output { } /// The tracker handles backing one measurement; read once into [`IoCounts`]. Data, -/// commit-graph, probe, and open counters are fresh per op; the `__manifest` tracker -/// is the ambient ground-truth one when inside `cost_harness`, else fresh. +/// probe, and open counters are fresh per op; the `__manifest` tracker is the ambient +/// ground-truth one when inside `cost_harness`, else fresh. struct OpProbes { manifest: IOTracker, - commit_graph: IOTracker, table: PrefixCounter, probe_count: Arc, data_open_count: Arc, @@ -300,7 +298,6 @@ impl OpProbes { let _ = manifest.incremental_stats(); let h = OpProbes { manifest, - commit_graph: IOTracker::default(), table: PrefixCounter::default(), probe_count: Arc::new(AtomicU64::new(0)), data_open_count: Arc::new(AtomicU64::new(0)), @@ -308,9 +305,6 @@ impl OpProbes { }; let probes = QueryIoProbes { manifest_wrapper: Some(Arc::new(h.manifest.clone()) as Arc), - commit_graph_wrapper: Some( - Arc::new(h.commit_graph.clone()) as Arc - ), table_wrapper: Some(Arc::new(h.table.clone()) as Arc), probe_count: Arc::clone(&h.probe_count), data_open_count: Arc::clone(&h.data_open_count), @@ -340,7 +334,6 @@ impl OpProbes { data_opener_reads: t.opener_reads, data_scan_reads: t.scan_reads, manifest_reads: manifest.read_iops, - commit_graph_reads: self.commit_graph.incremental_stats().read_iops, version_probes: self.probe_count.load(Ordering::Relaxed), data_open_count: self.data_open_count.load(Ordering::Relaxed), internal_open_count: self.internal_open_count.load(Ordering::Relaxed), @@ -431,10 +424,9 @@ pub async fn measure_insert(db: &mut Omnigraph, tag: &str) -> IoCounts { io } -/// Like [`measure_insert`] but carries an actor, so the write appends to and reads -/// `_graph_commit_actors.lance` — the authenticated (server/CLI) write path. The -/// commit-graph IO wrapper covers both `_graph_commits` and `_graph_commit_actors`, -/// so `IoCounts::commit_graph_reads` includes the actor-table scan on this path. +/// Like [`measure_insert`] but carries an actor — the authenticated (server/CLI) +/// write path. The actor is written inline with the graph-lineage rows in +/// `__manifest` (RFC-013 Phase 7), so its scan is part of `IoCounts::manifest_reads`. pub async fn measure_insert_as(db: &mut Omnigraph, tag: &str, actor: &str) -> IoCounts { let (res, io) = measure(db.mutate_as( "main", diff --git a/crates/omnigraph/tests/warm_read_cost.rs b/crates/omnigraph/tests/warm_read_cost.rs index 8f6e74e0..0d36940b 100644 --- a/crates/omnigraph/tests/warm_read_cost.rs +++ b/crates/omnigraph/tests/warm_read_cost.rs @@ -70,10 +70,6 @@ async fn warm_same_branch_read_does_no_resolution_opens() { io.manifest_reads, 0, "warm same-branch read must not scan __manifest (resolution or per-table)" ); - assert_eq!( - io.commit_graph_reads, 0, - "warm same-branch read must not open the commit graph (no coordinator re-open)" - ); assert_eq!( io.version_probes, 1, "warm same-branch read performs exactly one version probe" @@ -261,10 +257,6 @@ async fn warm_branch_read_does_no_manifest_scans() { io.manifest_reads, 0, "warm branch read must not scan __manifest (branch-owned table opened by location)" ); - assert_eq!( - io.commit_graph_reads, 0, - "warm branch read must not open the commit graph" - ); assert_eq!( io.version_probes, 1, "warm branch read performs exactly one version probe" @@ -352,10 +344,6 @@ async fn warm_read_on_recreated_branch_observes_new_incarnation() { io.manifest_reads > 0, "recreated branch must re-read the manifest after the incarnation probe" ); - assert_eq!( - io.commit_graph_reads, 0, - "same-branch incarnation refresh must be manifest-only" - ); assert_eq!( io.version_probes, 2, "stale same-branch read probes once under the read lock and once under the write lock" @@ -449,10 +437,6 @@ async fn recreated_branch_owned_table_handle_uses_table_etag() { io.manifest_reads > 0, "recreated branch must refresh the manifest" ); - assert_eq!( - io.commit_graph_reads, 0, - "same-branch table-incarnation refresh must be manifest-only" - ); assert_eq!( io.version_probes, 2, "stale same-branch read probes once under each lock" @@ -564,10 +548,6 @@ async fn recreated_branch_traversal_uses_graph_index_incarnation() { io.manifest_reads > 0, "recreated branch traversal must refresh the manifest" ); - assert_eq!( - io.commit_graph_reads, 0, - "same-branch traversal incarnation refresh must be manifest-only" - ); assert_eq!( io.version_probes, 2, "stale same-branch read probes once under each lock" @@ -633,10 +613,6 @@ async fn stale_read_refreshes_manifest_only() { io.manifest_reads > 0, "stale read must re-read the manifest" ); - assert_eq!( - io.commit_graph_reads, 0, - "stale refresh must be manifest-only (no commit-graph scan)" - ); assert_eq!( io.version_probes, 2, "stale same-branch read probes once under the read lock and once under the write lock" @@ -691,10 +667,6 @@ async fn repeat_warm_read_reuses_table_handles() { "a warm repeat read must reuse the held handle (0 table opens)" ); assert_eq!(warm.manifest_reads, 0, "warm repeat read: 0 manifest opens"); - assert_eq!( - warm.commit_graph_reads, 0, - "warm repeat read: 0 commit-graph opens" - ); assert_eq!( warm.version_probes, 1, "warm repeat read: exactly one version probe" diff --git a/crates/omnigraph/tests/write_cost.rs b/crates/omnigraph/tests/write_cost.rs index 422be418..0b3e1ed9 100644 --- a/crates/omnigraph/tests/write_cost.rs +++ b/crates/omnigraph/tests/write_cost.rs @@ -31,16 +31,16 @@ use helpers::{MUTATION_QUERIES, commit_many, commit_many_as, init_and_load, mixe // ── (A) The internal-table LOCK — the acceptance test for step 2 (compaction) ── // -// `__manifest` / `_graph_commits` / `_graph_commit_actors` scans on a write must be -// O(1) in commit-history depth **on a compacted graph**. Without internal-table -// compaction these scans are O(fragments) and grow forever; step 2 brings all three -// internal tables into `db.optimize()`, so after compaction the per-write scan is -// flat. The test runs the **authenticated (actorful) write path** — every commit -// carries an actor, so it grows `_graph_commit_actors.lance` too (the production -// server/CLI path); the commit-graph IO wrapper covers both that and `_graph_commits`, -// so `commit_graph_reads` includes the actor-table scan. It compacts at each depth -// checkpoint before measuring — pinning the production invariant "a periodically- -// compacted graph's write cost does not grow with version history." +// The `__manifest` scan on a write must be O(1) in commit-history depth **on a +// compacted graph**. Graph lineage lives in `__manifest` (RFC-013 Phase 7 — the +// `_graph_commits`/`_graph_commit_actors` tables are retired), so the manifest scan +// also covers the lineage and actor rows the authenticated write path appends. +// Without internal-table compaction these scans are O(fragments) and grow forever; +// step 2 brings the internal tables into `db.optimize()`, so after compaction the +// per-write scan is flat. The test runs the **authenticated (actorful) write path** +// — every commit carries an actor — and compacts at each depth checkpoint before +// measuring, pinning the production invariant "a periodically-compacted graph's +// write cost does not grow with version history." #[tokio::test] async fn internal_table_scans_are_flat_in_history() { // `cost_harness` installs the ground-truth __manifest tracker for the whole body, @@ -64,16 +64,16 @@ async fn internal_table_scans_are_flat_in_history() { let io = measure_insert_as(&mut db, &format!("lock_{d}"), ACTOR).await; current += 1; // the measured write advanced depth by one eprintln!( - "depth~{d}: data={} __manifest={} _graph_commits+actors={}", - io.data_reads, io.manifest_reads, io.commit_graph_reads + "depth~{d}: data={} __manifest={}", + io.data_reads, io.manifest_reads ); curve.push((d, io)); } + // Lineage + actor rows live in `__manifest` now, so this single flat-assertion + // gates the whole internal-table scan (including the authenticated path's actor + // rows) across history. assert_flat(&curve, |c| c.manifest_reads, 4, "__manifest scan"); - // commit_graph_reads covers BOTH _graph_commits and _graph_commit_actors (shared - // wrapper), so this also gates the actor table on the authenticated path. - assert_flat(&curve, |c| c.commit_graph_reads, 4, "_graph_commits + _graph_commit_actors scan"); }) .await; } @@ -99,8 +99,8 @@ async fn internal_table_scans_are_flat_in_history() { /// **When it goes red, that is the signal to invert it to** /// `assert_flat(&curve, |c| c.manifest_reads, , "__manifest scan (served)")` — /// promoting it to the permanent served-regime gate. Only `manifest_reads` is -/// asserted: #299 moved lineage into `__manifest` and made the per-write commit-graph -/// update in-memory, so `commit_graph_reads` no longer grows per write on this branch. +/// asserted: lineage lives in `__manifest` (RFC-013 Phase 7) and the per-write +/// commit-graph update is in-memory, so there is no separate commit-graph scan. #[tokio::test] async fn internal_table_scans_grow_without_compaction() { cost_harness(async { @@ -120,8 +120,8 @@ async fn internal_table_scans_grow_without_compaction() { let io = measure_insert_as(&mut db, &format!("served_{d}"), ACTOR).await; current += 1; // the measured write advanced depth by one eprintln!( - "depth~{d} (uncompacted): data={} __manifest={} _graph_commits+actors={}", - io.data_reads, io.manifest_reads, io.commit_graph_reads + "depth~{d} (uncompacted): data={} __manifest={}", + io.data_reads, io.manifest_reads ); curve.push((d, io)); } @@ -212,8 +212,8 @@ async fn write_op_count_ceiling_at_shallow_depth() { commit_many(&mut db, 5).await; let io = measure_insert(&mut db, "ceil").await; eprintln!( - "depth~5: data={} __manifest={} _graph_commits={} total_reads={}", - io.data_reads, io.manifest_reads, io.commit_graph_reads, io.total_reads() + "depth~5: data={} __manifest={} total_reads={}", + io.data_reads, io.manifest_reads, io.total_reads() ); // Sub-ceiling on ground-truth `__manifest` reads. ~18 measured at this depth = // ~15 publish-path scans (one fold, not four — RFC-013 P2) + ~3 from the diff --git a/crates/omnigraph/tests/write_cost_s3.rs b/crates/omnigraph/tests/write_cost_s3.rs index d8ffd4f0..f97c8811 100644 --- a/crates/omnigraph/tests/write_cost_s3.rs +++ b/crates/omnigraph/tests/write_cost_s3.rs @@ -54,12 +54,11 @@ async fn data_table_opener_is_flat_in_history_on_s3() { let io = measure_insert(&mut db, &format!("s3_{d}")).await; current += 1; eprintln!( - "depth~{d}: opener={} scan={} data_total={} __manifest={} _graph_commits={}", + "depth~{d}: opener={} scan={} data_total={} __manifest={}", io.data_opener_reads, io.data_scan_reads, io.data_reads, - io.manifest_reads, - io.commit_graph_reads + io.manifest_reads ); curve.push((d, io)); } From 05eafc6e21eb6ada945c2718bfbb2802256aac73 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 15:53:06 +0200 Subject: [PATCH 7/8] docs: align with the commit-graph retirement + strand storage versioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the always-loaded and user-facing docs to match the landed state: graph lineage lives in __manifest, the _graph_commits.lance / _graph_commit_actors.lance tables are retired, and storage is strict-single-version (no in-place migration — a sub-CURRENT graph is refused with an export/import rebuild). Fixed stale claims in invariants.md (the migration/atomicity known-gap entry, the Truth Matrix branch-delete row, the read-path/optimize internal-table scope), lance.md (the migrate_v1_to_v2 PK bullet now reflects init-time set; removed the two deleted v3->v4 migration surface guards), testing.md (dropped the deleted migration failpoint tests; manifest-only internal-table term), writes.md (rewrote the Migration-code section to the strand model), storage.md / maintenance.md / constants.md (retired tables out of the layout, internal-table compaction scope, and the constants cheat-sheet), and AGENTS.md. Marked the retirement DONE in the RFC-013 handoff/roadmap and banner-noted the historical RFC analysis. Added docs/user/operations/upgrade.md (the export/import rebuild recipe) and docs/dev/versioning.md (the four-axis compatibility policy: release lockstep / wire additive / storage strict-single-version / Lance pinned), cross-linked from the audience indexes and the AGENTS.md topic map, and rewrote the in-progress v0.8.0 release note for the strand model + version surfacing. check-agents-md.sh passes (65 links, 62 docs). --- AGENTS.md | 4 +- docs/dev/handoff-rfc-013-write-path.md | 15 +++- docs/dev/index.md | 1 + docs/dev/invariants.md | 62 ++++++-------- docs/dev/lance.md | 10 ++- docs/dev/rfc-013-write-path-latency.md | 8 ++ docs/dev/testing.md | 6 +- docs/dev/versioning.md | 78 +++++++++++++++++ docs/dev/write-latency-roadmap.md | 2 +- docs/dev/writes.md | 56 ++++++------- docs/releases/v0.8.0.md | 112 ++++++++++++++----------- docs/user/concepts/storage.md | 27 +++--- docs/user/index.md | 1 + docs/user/operations/maintenance.md | 2 +- docs/user/operations/upgrade.md | 84 +++++++++++++++++++ docs/user/reference/constants.md | 6 +- 16 files changed, 330 insertions(+), 144 deletions(-) create mode 100644 docs/dev/versioning.md create mode 100644 docs/user/operations/upgrade.md diff --git a/AGENTS.md b/AGENTS.md index c0ff4f0a..bca6ea77 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -92,6 +92,8 @@ Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architec | Diff / change feed (`diff_between`, `diff_commits`) | [docs/user/branching/changes.md](docs/user/branching/changes.md) | | Query execution, mutation execution, bulk loader, `load` vs `ingest` | [docs/dev/execution.md](docs/dev/execution.md) | | `optimize` (compaction) and `cleanup` (version GC) | [docs/user/operations/maintenance.md](docs/user/operations/maintenance.md) | +| Upgrade across a storage-format change (export/import rebuild) | [docs/user/operations/upgrade.md](docs/user/operations/upgrade.md) | +| Versioning & compatibility policy (release / wire / storage strict-single-version / Lance) | [docs/dev/versioning.md](docs/dev/versioning.md) | | Cluster operator guide (deploy/manage clusters, approvals, recovery, serving) | [docs/user/clusters/index.md](docs/user/clusters/index.md) | | Cedar policy actions, scopes, CLI | [docs/user/operations/policy.md](docs/user/operations/policy.md) | | HTTP server endpoints, auth, error model, body limits | [docs/user/operations/server.md](docs/user/operations/server.md) | @@ -263,7 +265,7 @@ omnigraph policy explain --cluster ./company-brain --graph knowledge --actor act | Schema language | — | `.pg` + Pest grammar + catalog + interfaces + constraints + annotations | | Query language | — | `.gq` + Pest grammar + IR + lowering + linter | | Schema migration planning | — | `plan_schema_migration` + `apply_schema` step types + `__schema_apply_lock__` | -| Commit graph (DAG) across whole graph | — | Lineage (linear + merge parents, ULID ids, actor) stored as `graph_commit`/`graph_head` rows in `__manifest`, written in the same publish CAS as the table-version rows (RFC-013 Phase 7 — no separate `_graph_commits.lance` write; manifest→commit-graph atomicity gap closed); the in-memory commit graph is a projection of those rows | +| Commit graph (DAG) across whole graph | — | Lineage (linear + merge parents, ULID ids, actor) stored as `graph_commit`/`graph_head` rows in `__manifest`, written in the same publish CAS as the table-version rows (RFC-013 Phase 7 — atomic with the graph commit). The in-memory commit graph is a pure projection of those rows; the legacy `_graph_commits.lance` / `_graph_commit_actors.lance` tables are **retired** (a fresh graph creates neither) | | Per-query atomic writes | — | In-memory `MutationStaging.pending` accumulator + `stage_*` / `commit_staged` per touched table at end-of-query + publisher CAS via `commit_with_expected` (single manifest commit per `mutate_as` / `load`); D₂ parse-time rule keeps inserts/updates and deletes from mixing | | Three-way row-level merge | — | `OrderedTableCursor` + `StagedTableWriter`, structured `MergeConflictKind` | | Change feeds | — | `diff_between` / `diff_commits` with manifest fast path + ID streaming | diff --git a/docs/dev/handoff-rfc-013-write-path.md b/docs/dev/handoff-rfc-013-write-path.md index a281b60e..9ae296b6 100644 --- a/docs/dev/handoff-rfc-013-write-path.md +++ b/docs/dev/handoff-rfc-013-write-path.md @@ -1,5 +1,12 @@ # Handoff: finishing RFC-013 (write-path latency + correctness) +> **Status update (strand-and-retire work):** the `_graph_commits.lance` / +> `_graph_commit_actors.lance` datasets are **retired** (Phase B — lineage lives in +> `__manifest`; the `CommitGraph` is a pure projection). References to those tables +> below are historical: `optimize` now compacts **`__manifest` only** among the +> internal tables, and the per-write `_graph_commits` scan term is gone. See +> [versioning.md](versioning.md) and [invariants.md](invariants.md). + **Status:** living handoff. **Source of truth is [`rfc-013-write-path-latency.md`](rfc-013-write-path-latency.md)** — this doc is the *current-state map + the decisions/validation from the latest work cycle + the concrete next actions*. When they disagree, the RFC wins (and fix this doc). @@ -142,9 +149,11 @@ a 0-IO cache hit). So, apples-to-apples (both ground truth), per-write `__manife is genuinely cross-branch with no index today). - **PR4 / RFC #7264** — Lance native branch-aware `BatchCreateTableVersions`; manifest read → O(1), per-write fragment append gone; retires most of PR1/PR2. Upstream-blocked. -4. **Low-leverage:** retire the vestigial `_graph_commits`/`_graph_commit_actors` datasets (zero rows - post-#299, only branch-ref carriers); a bitmap index on `__manifest` (no builder exists; `use_index(false)` - means it can't serve the CAS join anyway — a `graph_head:` point-lookup is the better variant). +4. **Low-leverage:** ~~retire the vestigial `_graph_commits`/`_graph_commit_actors` datasets~~ **DONE** + (Phase B of the strand-and-retire work — the tables are gone, branch authority is `__manifest`, the + `CommitGraph` is a pure `__manifest` projection). Still open: a bitmap index on `__manifest` (no builder + exists; `use_index(false)` means it can't serve the CAS join anyway — a `graph_head:` point-lookup + is the better variant). ### A.6 Critical files (Thread B) diff --git a/docs/dev/index.md b/docs/dev/index.md index e48402dc..dc51e836 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -12,6 +12,7 @@ constraints. User-facing behavior should still be documented through | Need | Read | |---|---| | Architectural rules, known gaps, deny-list | [invariants.md](invariants.md) | +| Versioning & compatibility policy (release / wire / storage / Lance) | [versioning.md](versioning.md) | | Upstream Lance source-of-truth index | [lance.md](lance.md) | | Existing test coverage and test placement | [testing.md](testing.md) | diff --git a/docs/dev/invariants.md b/docs/dev/invariants.md index e6d1e425..f7a6443a 100644 --- a/docs/dev/invariants.md +++ b/docs/dev/invariants.md @@ -155,7 +155,7 @@ converge the physical state. | Multi-table commit | Manifest CAS plus recovery sidecars; not a single Lance primitive | [writes.md](writes.md), [architecture.md](architecture.md) | | Constructive mutations | In-memory `MutationStaging`, one end-of-query table commit per touched table, then one manifest publish | [writes.md](writes.md), [execution.md](execution.md) | | Deletes | Staged like inserts/updates (`stage_delete` via Lance 7.0 `DeleteBuilder::execute_uncommitted`, MR-A) — no inline HEAD advance; mixed insert/update/delete in one query rejected by D2 as a deliberate boundary (constructive XOR destructive per query; compose via separate mutations or a branch) | [query-language.md](../user/queries/index.md), [writes.md](writes.md) | -| Branch delete | Manifest is the single authority, flipped atomically first; per-table forks + commit-graph branch are derived state, reclaimed best-effort (`force_delete_branch`) with the `cleanup` reconciler as the guaranteed backstop. Reusing a name whose reclaim failed before `cleanup` surfaces an actionable error | [branches-commits.md](../user/branching/index.md), [maintenance.md](../user/operations/maintenance.md) | +| Branch delete | Manifest is the single authority, flipped atomically first; per-table forks are derived state, reclaimed best-effort (`force_delete_branch`) with the `cleanup` reconciler as the guaranteed backstop. Reusing a name whose reclaim failed before `cleanup` surfaces an actionable error | [branches-commits.md](../user/branching/index.md), [maintenance.md](../user/operations/maintenance.md) | | Schema validation | Type checks, required fields, defaults, edge endpoint checks, and edge cardinality are enforced on write paths | [schema-language.md](../user/schema/index.md), [execution.md](execution.md) | | Unique constraints | Intra-batch and write-path checks exist; intake and branch-merge derive the composite key through one shared function (`loader::composite_unique_key`, a separator-free `Vec` tuple) and fail loudly on an un-keyable column type rather than silently exempting it; full cross-version uniqueness against already-committed rows is still a gap | [schema-language.md](../user/schema/index.md) | | Storage trait | `TableStorage` (via `db.storage()`) is staged-only; the sole inline-commit residual (`create_vector_index`) is split onto a separate sealed `InlineCommitResidual` trait reached via `db.storage_inline_residual()` (MR-854), so §1 holds by construction; capability/stat surfaces are roadmap | [writes.md](writes.md), [architecture.md](architecture.md) | @@ -277,42 +277,26 @@ them explicit. a separate stamp round-trip per branch read is the wrong shape (it regresses the warm-read cost budget to defend an unsupported state). - **Manifest→commit-graph publish atomicity — CLOSED (RFC-013 Phase 7):** graph - lineage now lives ONLY in `__manifest`, as `graph_commit` + `graph_head:` + lineage lives ONLY in `__manifest`, as `graph_commit` + `graph_head:` rows written in the SAME `MergeInsertBuilder` commit as the table-version rows (`commit_changes_with_lineage` → `GraphNamespacePublisher::publish` with a `LineageIntent`). There is no second write to fail between — a graph commit and its lineage land at one manifest version atomically, so a crash after the publish - leaves no gap. The commit-graph cache is a derived projection of those manifest - rows; nothing writes `_graph_commits.lance` (it persists only to carry branch - refs). The prior two-write gap (manifest at N with no `_graph_commits` row for N) - is gone by construction. A graph created before Phase 7 (internal schema v3) - carries its lineage only in `_graph_commits.lance`; the `migrate_v3_to_v4` - internal-schema step (`db/manifest/migrations.rs`) backfills it into `__manifest` - per-branch on the first read-write open (idempotent, crash-safe, data-preserving), - and a read-only open of an un-migrated v3 graph sources the DAG from - `_graph_commits.lance` via a stamp-gated transitional fallback so reads stay - correct until the first write migrates it. An old binary refuses a v4-stamped - graph (read-write and read-only) with the standard upgrade error. The migration - is **loud on failure and concurrent-runner idempotent**: the legacy-open read - (`read_legacy_commit_cache`) treats only a genuine not-found as "no legacy data" - and propagates any other open error (so a transient/corrupt open can never stamp - v4 over an empty backfill — orphaning lineage permanently), and the backfill - converges all-or-nothing when two runners open the same legacy graph at once — a - bounded re-open retry on the `graph_head:` row-level CAS plus an - idempotent terminal stamp bump (both runners write the same value, so a concurrent - `UpdateConfig`/`IncompatibleTransaction` loss re-opens and no-ops if the stamp - already landed). The branch read path (`load_commit_cache_for_branch`) also - refuses an out-of-range branch stamp (`> CURRENT` or `< MIN_SUPPORTED`; - defense-in-depth; not a live hole because migrations run main-first, so main - refuses first). The migration chain is **floor-bounded**: - `MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION` (migrations.rs; 1 today, a pure no-op) is - the oldest stamp this binary opens, enforced symmetrically with the ceiling by the - single `refuse_if_stamp_unsupported` guard at all three stamp-read sites - (write-path migrate, read-only open, branch lineage-read). Raising MIN sheds the - now-dead `migrate_vN_…` arms and (at MIN ≥ 4) the `commit_graph_legacy_v3` legacy - readers; a compile-time tripwire (`LOWEST_REGISTERED_MIGRATION_SOURCE`) fails the - build if the floor and the lowest registered arm drift. Retirement runbook lives on - the `MIN_SUPPORTED_INTERNAL_SCHEMA_VERSION` doc-comment. + leaves no gap. The in-memory commit graph is a pure projection of those rows. The + `_graph_commits.lance` / `_graph_commit_actors.lance` tables are **retired**: a + fresh graph creates neither, branch authority is `__manifest` only, and nothing + reads or writes them. The prior two-write gap (manifest at N with no + `_graph_commits` row for N) is gone by construction. +- **Storage is strict-single-version (the strand model):** this binary reads + exactly ONE internal-schema version (`MIN_SUPPORTED == CURRENT`), so there is no + in-place migration. A graph stamped below CURRENT is refused on open with a + rebuild-via-export/import message (`refuse_if_stamp_unsupported`), not silently + upgraded; a graph stamped above CURRENT is refused with an "upgrade omnigraph" + message. The `migrate_v*` dispatcher, the `_graph_commits.lance` legacy-read + fallback, and the migration floor-bounding machinery were all deleted with the + retirement — the stamp + `refuse_if_stamp_unsupported` floor is the only seam a + future migration would re-introduce. See `docs/dev/versioning.md` (the + compatibility policy) and `docs/user/operations/upgrade.md` (the rebuild recipe). - **Planner capability/stat surfaces:** cost-aware planning, complete capability advertisement, and explain-with-cost are roadmap. Do not describe them as implemented. @@ -329,7 +313,8 @@ them explicit. widening the gap. - **Read-path re-derivation (largely closed by the query-latency work):** snapshot resolution used to re-open a fresh coordinator per read (a full - `__manifest` re-scan plus two commit-graph scans), open each table through the + `__manifest` re-scan plus the then-separate commit-graph-table scans, since + retired), open each table through the namespace (two more `__manifest` scans per table), validate the schema twice, and share no Lance `Session`. That was an O(commits) cost that never warmed up. Fix 1 (warm coordinator reuse behind a `latest_version_id` probe), Fix 2 (open @@ -343,10 +328,11 @@ them explicit. the manifest e_tag is carried into synthetic snapshot ids when available, and a detected same-branch manifest refresh clears read caches as the fallback for e_tag-less table locations/topology. Remaining: `optimize` now compacts the - internal metadata tables (`__manifest`, `_graph_commits`) too (RFC-013 step 2), - so a *periodically-optimized* graph keeps the probe/refresh/per-write scan flat - in history; but they are not yet brought into `cleanup` (version GC), so the - `_versions/` chain still grows until an explicit cleanup (the cleanup half is + internal metadata table (`__manifest`, which carries the lineage rows) too + (RFC-013 step 2), so a *periodically-optimized* graph keeps the + probe/refresh/per-write scan flat in history; but it is not yet brought into + `cleanup` (version GC), so the `_versions/` chain still grows until an explicit + cleanup (the cleanup half is deferred — it needs the Q8 cleanup-resurrection watermark first). The commit graph IS now reconcilable from the manifest (RFC-013 Phase 7 — it is a pure projection of the `graph_commit`/`graph_head` rows); the traversal id-map is diff --git a/docs/dev/lance.md b/docs/dev/lance.md index ad3e11db..ca8c514a 100644 --- a/docs/dev/lance.md +++ b/docs/dev/lance.md @@ -166,11 +166,17 @@ Migration from Lance 6.0.1 → 7.0.0 landed in this cycle. **Arrow stayed 58, Da - **BTREE range-query bound inclusiveness fixed** (PR #6796, issue #6792): `x <= hi AND x > lo` returned the wrong boundary row on 6.0.1. omnigraph today builds BTREE only on string `@key` columns (`id`/`src`/`dst`) and queries them by equality/IN, not range, so its *current* query patterns almost certainly never hit this bug — but the corrected boundary semantics are a contract we rely on the moment a BTREE-range path appears (BTREE-on-properties via the index-type tickets, or a range-on-key query). Pinned by `lance_surface_guards.rs::btree_range_query_boundary_is_correct` (reproduces #6792's 5-row + BTREE shape). - **`WriteParams::auto_cleanup` default flipped from on (every-20-commits) to `None`** (PR #6755). On 6.0.1 the on-by-default hook could GC versions the `__manifest` pins for snapshots/time-travel. omnigraph owns cleanup explicitly (`optimize.rs::cleanup_all_tables`). Two parts to the fix, because `auto_cleanup` is **create-time config only and has no effect on existing datasets** (Lance `write.rs` docs): (1) `auto_cleanup: None` at all 11 `WriteParams` sites so *new* datasets store no cleanup config; (2) — the load-bearing half — `skip_auto_cleanup: true` on every commit path, because graphs created **before** the bump still carry the on-config in their datasets, and Lance's hook fires off the *dataset's stored* config at commit time (`io/commit.rs`: `if !commit_config.skip_auto_cleanup`). So the staged commit path (`commit_staged` → `CommitBuilder::with_skip_auto_cleanup(true)`), the `__manifest` publisher (`MergeInsertBuilder::skip_auto_cleanup(true)`), and the direct `WriteParams` paths all skip the hook. Without this, an upgraded graph would still auto-cleanup and delete `__manifest`-pinned versions. Pinned by `lance_surface_guards.rs::skip_auto_cleanup_suppresses_version_gc` (negative control + with-skip survival). - **Lance #6658 SHIPPED in 7.0.0** (`DeleteBuilder::execute_uncommitted`, exposed via PR #6781) → MR-A (migrate `delete` to the staged two-phase API) **has since landed** (dev-graph `iss-950`): `delete_where` is retired, deletes stage via `TableStorage::stage_delete`, and the guard was flipped to `_compile_uncommitted_delete_field_shape` (pins `execute_uncommitted` / `UncommittedDelete`). `StagedWrite` must carry `UncommittedDelete.affected_rows` through `commit_staged` so Lance's row-level rebase metadata is preserved. The parse-time D2 rule is retained as a deliberate boundary (constructive XOR destructive per query), not as scaffolding awaiting further work. -- **The unenforced primary key is now immutable once set** (`lance::dataset::transaction`, ~L2472–2480: `if !primary_key_before.is_empty() && (writes_primary_key || primary_key_after != primary_key_before) → "the unenforced primary key is a reserved key and cannot be changed once set"`). omnigraph marks `__manifest.object_id` as the unenforced PK (`lance-schema:unenforced-primary-key`) for merge-insert row-level CAS — baked into `manifest_schema()` at init, and added by the `migrate_v1_to_v2` internal-schema migration for pre-v0.4.0 graphs. The migration relied on Lance 6's idempotent re-apply for crash-recovery (a crash after the field-set but before the stamp bump re-enters the migration with the PK already present); under v7 that re-apply errors, so a real v1 graph could never finish migrating. Fixed by guarding the set on the manifest's unenforced-PK field (`db/manifest/migrations.rs::migrate_v1_to_v2`): `["object_id"]` → no-op, `[]` → set, any other PK field → loud refusal (the wrong CAS key, unchangeable under v7). Pinned by `lance_surface_guards.rs::unenforced_primary_key_is_immutable_once_set` (red if Lance relaxes immutability); regression: `db::manifest::tests::test_publish_migrates_pre_stamp_manifest_to_current_version` (was red under v7). +- **The unenforced primary key is now immutable once set** (`lance::dataset::transaction`, ~L2472–2480: `if !primary_key_before.is_empty() && (writes_primary_key || primary_key_after != primary_key_before) → "the unenforced primary key is a reserved key and cannot be changed once set"`). omnigraph marks `__manifest.object_id` as the unenforced PK (`lance-schema:unenforced-primary-key`) for merge-insert row-level CAS — baked into the manifest schema at init (`db/manifest/state.rs`). With the strand model there is no in-place migration, so the PK is only ever set at init: a graph that predates the annotation is refused on open (`refuse_if_stamp_unsupported`) and rebuilt via export/import, never re-keyed — which is also what Lance's immutability rule would require, since the wrong PK could not be changed once set. Pinned by `lance_surface_guards.rs::unenforced_primary_key_is_immutable_once_set` (red if Lance relaxes immutability). - **Native `DirectoryNamespace` no longer recognizes omnigraph's manifest-tracked tables** (`lance-namespace-impls` dir.rs ~L1310): `list/describe/create_table_version` route through `check_table_status`, which reports an omnigraph table absent → `TableNotFound`. The decoupling is *contingent on omnigraph's legacy boolean PK key*, not an unconditional v7 property: v7's namespace eagerly adds the new `lance-schema:unenforced-primary-key:position` key to any `__manifest` lacking it; that write hits the immutable-PK rule above (the boolean key already set the PK), so `ensure_manifest_table_up_to_date` errors and the namespace silently falls back to directory listing. omnigraph keeps the boolean key deliberately — Lance honors it permanently (maps to PK position 0), and one uniform on-disk format beats a new-vs-old split (existing graphs can't be re-keyed to the position key under that same immutability rule). omnigraph production never uses Lance's native namespace (its publisher writes `__manifest` directly via merge_insert; its own `namespace.rs` impls are custom), so this is test-only — the `test_directory_namespace_direct_publish_cannot_replace_native_omnigraph_write_path` surface guard was realigned to the v7 behavior (it now asserts the native namespace is fully decoupled, which only strengthens the guard's thesis). - **Still NOT fixed in 7.0.0:** vector-index two-phase (Lance #6666 open) — `create_vector_index` inline residual retained; blob-column compaction — `compact_files_still_fails_on_blob_columns` guard still red on a fix, `optimize` still skips blob tables behind `LANCE_SUPPORTS_BLOB_COMPACTION`. - **No Lance API surface omnigraph uses changed at *compile* time** (the only compile break was object_store) — but **two runtime behaviors did** (the unenforced-PK immutability and the native-namespace `TableNotFound`, above), each caught by the full engine test suite rather than the build. `CleanupPolicy`, `WriteParams` (apart from the `auto_cleanup` default), `CompactionOptions`, the namespace models (resolved via `lance-namespace-reqwest-client` 0.7.7, unchanged across the bump), `Operation`, `ManifestLocation`, and `MergeInsertBuilder` shapes are all stable. Lesson: a clean build is not a clean alignment — run `cargo test --workspace` before declaring a Lance bump done. -- **Two surface guards added by the v3→v4 migration-robustness follow-up** (not a Lance bump, but they pin Lance error surfaces the migration now classifies on): `dataset_open_missing_returns_not_found_variant` (a missing `Dataset::open` returns `DatasetNotFound`/`NotFound` — the legacy-open read in `db/commit_graph.rs::read_legacy_commit_cache` treats only those as "no legacy data" and propagates everything else) and `lance_error_incompatible_transaction_variant_exists` (a concurrent `UpdateConfig` stamp-bump loses with `IncompatibleTransaction` — `db/manifest/migrations.rs::commit_v4_stamp_idempotently` matches it to retry the benign same-value race). Re-run on a Lance bump like the others. +- **The v3→v4 migration-robustness surface guards were removed with the strand.** + An earlier cycle added `dataset_open_missing_returns_not_found_variant` and + `lance_error_incompatible_transaction_variant_exists` to pin Lance error surfaces + the `migrate_v3_to_v4` backfill classified on. The strand retirement deleted that + migration (storage is now strict-single-version — see [invariants.md](invariants.md)), + so those guards and the legacy-read/stamp-bump code they pinned are gone. No + current omnigraph code path classifies on those Lance variants. Bump this date stanza on the next alignment pass. diff --git a/docs/dev/rfc-013-write-path-latency.md b/docs/dev/rfc-013-write-path-latency.md index 53f6430a..7ea4d3e0 100644 --- a/docs/dev/rfc-013-write-path-latency.md +++ b/docs/dev/rfc-013-write-path-latency.md @@ -1,5 +1,13 @@ # RFC-013: Write-path latency — capture-once `WriteTxn`, manifest-authoritative publish, bounded history, and a measured cost contract +> **Status update (storage-versioning strand-and-retire work):** the +> `_graph_commits.lance` / `_graph_commit_actors.lance` datasets are now **retired** +> (Phase B — lineage lives in `__manifest` as `graph_commit` / `graph_head` rows; +> the `CommitGraph` is a pure `__manifest` projection). References to those tables +> below are historical: the remaining `optimize` / `cleanup` internal-table scope is +> **`__manifest`-only**, and the per-write `_graph_commits` scan term is gone. See +> [invariants.md](invariants.md) and [versioning.md](versioning.md). + **Status:** Proposed **Author(s):** write-path latency investigation (handoff + multi-agent validation) **Date:** 2026-06-19 diff --git a/docs/dev/testing.md b/docs/dev/testing.md index 20388048..eb8323ef 100644 --- a/docs/dev/testing.md +++ b/docs/dev/testing.md @@ -26,7 +26,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav | `forbidden_apis.rs` | Defense-in-depth source-walk guard: engine code (`exec/`, `db/omnigraph/`, `loader/`, `changes/`) must not reach around the sealed storage trait to Lance inline-commit APIs, nor open datasets directly (`Dataset::open` / `DatasetBuilder::from_uri`/`from_namespace`) — reads route through `Snapshot::open` and the held-handle cache; `// forbidden-api-allow: ` sentinel exempts reviewed lines | | `lance_surface_guards.rs` | Pins the Lance API surfaces omnigraph depends on (named runtime + compile-only guards; see [lance.md](lance.md)) — the first smoke check on any Lance version bump; e.g. `compact_files_still_fails_on_blob_columns` turns red when the upstream blob-compaction fix lands | | `warm_read_cost.rs` | Cost-budget tests for the warm read path (query-latency work), measured at the object-store boundary with Lance `IOTracker` (the LanceDB IO-counted pattern): a warm same-branch read does 0 manifest opens, 0 commit-graph opens, 1 version probe, validates the schema once (Fix 1 / finding A / Fix 2 at commit-history depth); stale same-branch reads perform exactly 2 probes and refresh manifest-only; recreated non-main branches with the same Lance version refresh by incarnation; recreated branch-owned table handles are distinguished by table e_tag or refresh-time cache clearing; recreated traversal topology is protected by synthetic snapshot-id incarnation or refresh-time cache clearing; a warm *repeat* read does 0 table opens via the held-handle cache and a write re-opens only the changed table at its new version/e_tag (Fix 3/6A). See "Cost-budget tests" below | -| `write_cost.rs` | Cost-budget tests for the WRITE path (RFC-013), the latency twin of `warm_read_cost.rs` on the **shared `helpers::cost` harness** (`measure`/`IoCounts`/`assert_flat`/`local_graph`). Runs on **local FS**; gates the **internal-table** term (`__manifest`/`_graph_commits` scans flat in commit-history depth — `internal_table_scans_are_flat_in_history`, now **green every-PR** since RFC-013 step 2 brought the internal tables into `optimize`; the test compacts at each depth before measuring) plus green every-PR guards (single-insert `data_writes` bounded, a per-write read-op ceiling that fails the moment a round-trip is added, and a `measure_with_staged` fitness assert that a keyed insert routes through `stage_merge_insert` once with no `stage_append`/vector-index build). The **data-table opener** term is S3-only — see `write_cost_s3.rs` and the backend-split note in "Cost-budget tests" below | +| `write_cost.rs` | Cost-budget tests for the WRITE path (RFC-013), the latency twin of `warm_read_cost.rs` on the **shared `helpers::cost` harness** (`measure`/`IoCounts`/`assert_flat`/`local_graph`). Runs on **local FS**; gates the **internal-table** term (`__manifest` scans flat in commit-history depth, lineage rows included — `internal_table_scans_are_flat_in_history`, now **green every-PR** since RFC-013 step 2 brought the internal tables into `optimize`; the test compacts at each depth before measuring) plus green every-PR guards (single-insert `data_writes` bounded, a per-write read-op ceiling that fails the moment a round-trip is added, and a `measure_with_staged` fitness assert that a keyed insert routes through `stage_merge_insert` once with no `stage_append`/vector-index build). The **data-table opener** term is S3-only — see `write_cost_s3.rs` and the backend-split note in "Cost-budget tests" below | | `helpers/cost.rs` | The shared cost-budget harness (not a test): `IoCounts`/`StagedCounts` (counts by table class), `measure`/`measure_with_staged` (the one place the `with_query_io_probes` + `MergeWriteProbes` task-local + `IOTracker` wiring lives; reads per-op deltas via lance's `incremental_stats()`, the upstream per-request idiom from `rust/lance/src/dataset/tests/dataset_io.rs`), `cost_harness`/`GraphIoMeter` (installs ONE `__manifest` `IOTracker` for a whole test body so the graph opens **under** it and `manifest_reads` is **ground truth** — every read regardless of handle age, the warm-coordinator freshness probe included — closing the blind spot where a per-op tracker installed at measure time cannot see a long-lived handle's reads; outside `cost_harness`, `measure` falls back to fresh per-op tracking, so `write_cost_s3.rs` is unaffected), `last_manifest_reads()` (the manifest read log for `assert_io_eq!`-style failure diagnostics), `assert_flat(curve, select, slack, what)`, and store-agnostic `local_graph`/`s3_graph` fixtures. `warm_read_cost.rs`, `write_cost.rs`, and `write_cost_s3.rs` all consume it so a cost test body is written once and reads in one vocabulary | | `lifecycle.rs` | Graph lifecycle, schema state | | `point_in_time.rs` | Snapshots, time travel (`snapshot_at_version`, `entity_at`) | @@ -46,7 +46,7 @@ The engine's `tests/` is the principal coverage surface; most graph-shaped behav | `validators.rs` | Schema constraint enforcement (enum, range, unique, cardinality) across JSONL, insert, update paths | | `policy_engine_chassis.rs` | Engine-layer Cedar enforcement (MR-722): allow + deny through every `_as` writer via the SDK directly — no HTTP — proving embedded and CLI callers hit the same gate as the server, with action × scope shapes matching `authorize_request` | | `maintenance.rs` | `optimize` (compaction), `repair` (explicit uncovered-drift publish), and `cleanup` (version GC): empty/idempotent/no-op edges, policy validation, head preservation; `optimize` publishes its own compaction (`optimize_publishes_compaction_to_manifest_so_schema_apply_succeeds`), skips pre-existing uncovered drift (`optimize_skips_preexisting_manifest_head_drift`), and refuses to run while a `__recovery` sidecar is pending (`optimize_defers_when_recovery_sidecar_is_pending`); `repair` previews/heals verified maintenance drift, refuses raw semantic drift without `--force`, and forced repair publishes only by explicit operator choice; the index reconciler (iss-848): `index_build_tolerates_null_vector_rows` (an untrainable Vector column defers instead of aborting the build, sibling indexes still build) and `optimize_materializes_index_declared_but_unbuilt` (optimize creates a declared-but-deferred index) | -| `failpoints.rs` | Failure-injection coverage (gated on `failpoints` feature). Includes the five per-writer Phase B → recovery integration tests (`recovery_rolls_forward_after_finalize_publisher_failure`, `schema_apply_phase_b_failure_recovered_on_next_open`, `branch_merge_phase_b_failure_recovered_on_next_open`, `ensure_indices_phase_b_failure_recovered_on_next_open`, `optimize_phase_b_failure_recovered_on_next_open`) and the write-entry in-process heal contract (the four `*_after_finalize_publisher_failure_heals_without_reopen` tests — load, mutation, schema apply, branch merge: a follow-up write on the same handle rolls a sidecar-covered residual forward without reopen/refresh) and the storage-fault matrix for the sidecar lifecycle (`recovery.sidecar_{write,delete,list}` / `recovery.record_audit` failpoints: Phase A put failure aborts with zero drift, Phase D delete failure is swallowed and healed by the next write, list failures are loud at heal and open, audit-append failures are retried to exactly one audit row; plus the bucket-gated `s3_load_recovers_after_publisher_failure_without_reopen`). Also the v3→v4 migration fault-injection test (`transient_legacy_open_failure_aborts_migration_without_stamping_v4`, `migration.v3_to_v4.legacy_open` failpoint): a transient legacy-open failure aborts the migration loudly and leaves it retryable (stamp stays v3, no partial backfill), never stamping v4 over an empty backfill. Also the v4 stamp-bump exhaustion regression (`v4_stamp_exhaustion_returns_retryable_contention`, `migration.v4_stamp.force_incompatible` failpoint): the stamp retry loop surfaces a retryable `RowLevelCasContention` on exhaustion, not a stringified `Lance`. And the convergence-idempotent roll-forward regression (`open_sweep_roll_forward_converges_when_manifest_advances_concurrently`: two concurrent open-sweeps race one sidecar at the `recovery.before_roll_forward_publish` rendezvous; the CAS loser must converge, not fail the open — iss-schema-apply-reopen-recovery-race). | +| `failpoints.rs` | Failure-injection coverage (gated on `failpoints` feature). Includes the five per-writer Phase B → recovery integration tests (`recovery_rolls_forward_after_finalize_publisher_failure`, `schema_apply_phase_b_failure_recovered_on_next_open`, `branch_merge_phase_b_failure_recovered_on_next_open`, `ensure_indices_phase_b_failure_recovered_on_next_open`, `optimize_phase_b_failure_recovered_on_next_open`) and the write-entry in-process heal contract (the four `*_after_finalize_publisher_failure_heals_without_reopen` tests — load, mutation, schema apply, branch merge: a follow-up write on the same handle rolls a sidecar-covered residual forward without reopen/refresh) and the storage-fault matrix for the sidecar lifecycle (`recovery.sidecar_{write,delete,list}` / `recovery.record_audit` failpoints: Phase A put failure aborts with zero drift, Phase D delete failure is swallowed and healed by the next write, list failures are loud at heal and open, audit-append failures are retried to exactly one audit row; plus the bucket-gated `s3_load_recovers_after_publisher_failure_without_reopen`). And the convergence-idempotent roll-forward regression (`open_sweep_roll_forward_converges_when_manifest_advances_concurrently`: two concurrent open-sweeps race one sidecar at the `recovery.before_roll_forward_publish` rendezvous; the CAS loser must converge, not fail the open — iss-schema-apply-reopen-recovery-race). | | `recovery.rs` | Open-time recovery sweep — sidecar I/O, classifier dispatch (NoMovement / RolledPastExpected / UnexpectedAtP1 / UnexpectedMultistep / InvariantViolation), all-or-nothing decision, roll-forward via `ManifestBatchPublisher::publish`, roll-back via `Dataset::restore`, audit row in `_graph_commit_recoveries.lance`, `OpenMode::ReadOnly` skip path | | `composite_flow.rs` | Compositional/narrative end-to-end stories — multi-step flows that compose mechanics covered by other test files. Catches integration regressions where individual operations all pass their unit tests but their composition breaks (sequential merges, post-merge main writes, time-travel through merge DAG, reopen consistency over multi-merge histories, post-optimize and post-cleanup strict writes). | @@ -139,7 +139,7 @@ Correctness bugs fail loudly in tests; cost-scaling bugs pass every test and deg - **Assert a cost budget, not just a result.** For a read/open path, assert the number of `Dataset::open` calls (or object-store ops) a warm query performs, and that it does not grow with commit count. The reference is LanceDB's IO-counted tests, which assert a cached read costs 0-1 IO and carry a named regression test against "a list call on every subsequent query." - **Test at history depth.** Build a fixture with many *commits* (not many rows) and assert warm-read cost is flat across depths. A shallow fixture cannot catch an O(commits) cost. -- **Use the shared harness, and gate each term on the backend where it manifests.** `helpers::cost` (`measure`/`IoCounts`/`assert_flat`/`local_graph`/`s3_graph`) is the one place the `IOTracker`/task-local plumbing lives — consume it, don't duplicate it. The write path has *two distinct* depth terms that split cleanly across backends, and conflating them is a real trap (the local data-table read count grows with depth too, but for a different reason — the merge-insert/RI scan reading O(depth) *fragments*, reduced by compaction, not by the opener): (1) the **internal-table** scan term (`__manifest`/`_graph_commits` fragment scans) reproduces on **any** backend including local FS, so `write_cost.rs` gates it on local every-PR; (2) the **data-table opener** term (latest-version resolution) is a per-object-store-RPC phenomenon — local-FS resolves latest with one cheap `read_dir` regardless of the opener used, so the namespace-vs-direct difference is **invisible on local** and only shows on a real object store (per-version GETs), gated by the bucket-gated `write_cost_s3.rs`. Same harness, different fixture; each term asserted where it actually appears. +- **Use the shared harness, and gate each term on the backend where it manifests.** `helpers::cost` (`measure`/`IoCounts`/`assert_flat`/`local_graph`/`s3_graph`) is the one place the `IOTracker`/task-local plumbing lives — consume it, don't duplicate it. The write path has *two distinct* depth terms that split cleanly across backends, and conflating them is a real trap (the local data-table read count grows with depth too, but for a different reason — the merge-insert/RI scan reading O(depth) *fragments*, reduced by compaction, not by the opener): (1) the **internal-table** scan term (`__manifest` fragment scans, lineage rows included) reproduces on **any** backend including local FS, so `write_cost.rs` gates it on local every-PR; (2) the **data-table opener** term (latest-version resolution) is a per-object-store-RPC phenomenon — local-FS resolves latest with one cheap `read_dir` regardless of the opener used, so the namespace-vs-direct difference is **invisible on local** and only shows on a real object store (per-version GETs), gated by the bucket-gated `write_cost_s3.rs`. Same harness, different fixture; each term asserted where it actually appears. - **Count on the handle that does the reads, not just the one a measured op opens.** Lance's IO-counted tests attach the `IOTracker` to the (warm, cached) dataset and read `incremental_stats()` per request — the tracker MUST be on the handle performing the reads, or warm-handle reads escape. A per-op tracker installed at measure time cannot see reads on a long-lived handle opened earlier (the warm coordinator's `__manifest` handle, reused across writes), so such reads were silently undercounted. Wrap a depth-swept body in `cost_harness` so the manifest tracker is installed before the graph opens and `manifest_reads` is **ground truth** (handle-age-irrelevant). The `version_probes` counter is the freshness-probe *call* count; ground truth additionally reveals that a write's probe does ~3 object-store RPCs (a read's probe is a 0-IO cache hit). `manifest_reads_capture_warm_probe` is the guard that this stays true. - This is the testing companion to invariant 15 in [docs/dev/invariants.md](invariants.md) (hot-path cost is bounded by work, not history). diff --git a/docs/dev/versioning.md b/docs/dev/versioning.md new file mode 100644 index 00000000..7c46c3ad --- /dev/null +++ b/docs/dev/versioning.md @@ -0,0 +1,78 @@ +# Versioning & compatibility policy + +**Audience:** engine / storage / release maintainers +**Status:** living document + +Omnigraph has four independent version axes. They have different compatibility +contracts because they fail in different ways and at different costs. Conflating +them (for example, treating a storage-format change like a wire change) is how you +either ship an unsafe silent-misread or carry migration code you do not need. + +| Axis | Policy | Mechanism | +|---|---|---| +| **Release (semver)** | All published crates move in lockstep. | Maintenance-contract rule 4 in [AGENTS.md](../../AGENTS.md): a release bump updates every crate manifest, `Cargo.lock`, `openapi.json`, and the surveyed version line together. | +| **CLI ↔ server wire** | Additive and rolling-safe; **no version gate**. New fields are optional; old clients ignore unknown fields and omit new ones. | Additive JSON DTOs in `omnigraph-api-types`; the OpenAPI-drift test (`crates/omnigraph-server/tests/openapi.rs`) catches an unintended wire change. | +| **Storage (internal manifest schema)** | **Strict single version**; upgrade is a cutover via export/import, never an in-place migration. | A stamp (`omnigraph:internal_schema_version`) in `__manifest`'s schema metadata + `refuse_if_stamp_unsupported`, with `MIN_SUPPORTED == CURRENT`. | +| **Lance on-disk format** | Pinned to one Lance version; bumped deliberately with the engine. | `data_storage_version: V2_2` at every write site + the surface guards in [lance.md](lance.md), re-run on every Lance bump. | + +## Why storage is strict-single-version (the strand model) + +The internal-schema stamp gates the on-disk shape of `__manifest`. The contract is: +**this binary reads exactly one internal-schema version.** `Omnigraph::open` (both +read-write and read-only) reads main's stamp before any data and refuses anything +it cannot serve: + +- a stamp **below** CURRENT → refused with a rebuild-via-export/import message (see + [the upgrade guide](../user/operations/upgrade.md)); +- a stamp **above** CURRENT → refused with "upgrade omnigraph", so an old binary + cannot silently misread a newer format. + +There is no in-place migration dispatcher. The single source file +`db/manifest/migrations.rs` holds only the version constant, the stamp read/write, +and `refuse_if_stamp_unsupported`. + +This is a liability decision, not a limitation we have not gotten around to. In-place +migration code is permanent surface: every future format change has to write, +test, and keep working a `vN → vN+1` step, plus the legacy readers and crash-recovery +paths each step needs, for a storage format that is still pre-release and changing. +The strand model trades that ongoing cost for a one-time operator action (export + +import) when a format changes. Per "engineering is programming integrated over time" +(see [AGENTS.md](../../AGENTS.md)), the lower-liability option is to **not** carry +the machinery until a concrete graph demands it. + +The stamp + `refuse_if_stamp_unsupported` floor is exactly the seam a future in-place +migration would re-introduce: re-add a dispatcher and lower `MIN_SUPPORTED` below +CURRENT for the versions it can actually walk forward. Until then that machinery is +deliberately absent. + +### Gating altitude + +The stamp is validated at the **graph (main) level**: `Omnigraph::open` checks main +once, and branch reads trust it. The stamp is a graph-wide storage-format property +(the upgrade path is a whole-graph export/import), so with one binary version every +branch is always CURRENT — init stamps main, `create_branch` forks the stamp, and the +publisher writes rows without re-stamping. A branch stamped out of range while main +stays in range is only reachable with concurrent multi-version writers, an +unsupported topology; the residual is recorded as a known gap in +[invariants.md](invariants.md). + +## Why the wire is additive-rolling-safe instead + +The CLI↔server boundary is the opposite case: clients and servers are deployed +independently and a hard gate there would force lockstep redeploys for every field +addition. So that axis is additive — old and new coexist — and the OpenAPI-drift test +is the guard that a change stayed additive rather than breaking the shape. + +## When you change each axis + +- **Storage format**: bump `INTERNAL_MANIFEST_SCHEMA_VERSION`, keep + `MIN_SUPPORTED == CURRENT` (unless you are re-introducing migration), update the + stamp history on the constant's doc-comment, and add a release note pointing at + the upgrade guide. The change is breaking by construction — pre-bump graphs are + refused. +- **Wire**: keep it additive; regenerate `openapi.json` + (`OMNIGRAPH_UPDATE_OPENAPI=1`); do not add a version gate. +- **Lance**: follow the Lance-bump checklist in [lance.md](lance.md) — re-run the + surface guards first, then `cargo test --workspace` (a clean build is not a clean + alignment). +- **Release**: lockstep per the maintenance contract. diff --git a/docs/dev/write-latency-roadmap.md b/docs/dev/write-latency-roadmap.md index d6287d1a..3581f066 100644 --- a/docs/dev/write-latency-roadmap.md +++ b/docs/dev/write-latency-roadmap.md @@ -70,7 +70,7 @@ The publisher loop runs cold `load_publish_state` (open + full scan) on every at ## Sequencing -0. **Prereq (separate plan): retire `_graph_commits.lance` + `_graph_commit_actors.lance`.** Simplifies Layer 1 to `__manifest`-only and removes 2 cold-open LISTs. (Strand-and-rebuild storage-versioning plan.) +0. **Prereq (separate plan): retire `_graph_commits.lance` + `_graph_commit_actors.lance`. — DONE.** Phase B of the strand-and-retire storage-versioning work landed this: the tables are gone, Layer 1's GC scope is `__manifest`-only, and the 2 cold-open LISTs are removed. 1. **Operational now:** one-shot quiesced `__manifest` cleanup on the production graph (~7s → ~1s, no code). 2. **Layer 1:** wire `__manifest` into the `cleanup` command (manual/quiesced + sidecar-refuse) — the 80/20. 3. **Layers 3 + 4:** open-at-pinned-version + optimistic warm publish — remove the redundant round-trips; bring a warm write to ~2 cheap Lance-commit lists + writes. diff --git a/docs/dev/writes.md b/docs/dev/writes.md index a8d16696..e22e85b1 100644 --- a/docs/dev/writes.md +++ b/docs/dev/writes.md @@ -355,36 +355,32 @@ actual }`. The HTTP server maps this to **409 Conflict** with body `__manifest`, written in the publish CAS (RFC-013 Phase 7; previously `_graph_commits.lance`). Audit history is queried via `omnigraph commit list`. -## Migration code - -`db/manifest/migrations.rs` is the single place on-disk `__manifest` shape is -reconciled with what the binary expects, stepping the -`omnigraph:internal_schema_version` stamp forward one `match`-arm at a time. It -runs in `Omnigraph::open(ReadWrite)` (via `manifest::migrate_on_open`, before the -coordinator reads branch state) and again on the publisher's write path, so each -branch migrates on its first write; every step is idempotent under crash-retry -(work first, stamp bump last). - -- **v2→v3** (MR-770): a one-time sweep that deletes legacy `__run__*` staging - branches off `__manifest`. Deleting the inert `_graph_runs.lance` / - `_graph_run_actors.lance` dataset *bytes* is still deferred — it needs a - `StorageAdapter::delete_prefix` primitive — but those bytes are invisible to - graph-level state. -- **v3→v4** (RFC-013 Phase 7, `migrate_v3_to_v4`): backfills the graph lineage - from `_graph_commits.lance` into `__manifest` as `graph_commit` / `graph_head` - rows. A graph created before Phase 7 has its lineage only in - `_graph_commits.lance`; the new binary reads lineage from the `__manifest` - projection, so without this backfill it would see an empty commit DAG. The - backfill is per-branch (each branch migrates on its first write), idempotent - (keyed on `object_id`; a fast-path guard skips when `__manifest` already - carries `graph_commit` rows), and writes exactly one `graph_head:` row - for the actual head. `_graph_commits.lance` is left in place as the branch-ref - carrier — no commit row is written to it again. While a graph is below v4, a - **read-only** open (which never writes, so never migrates) sources the commit - DAG from `_graph_commits.lance` via the stamp-gated transitional fallback in - `CommitGraph::open*`, so reads see correct history before the first write - migrates the graph. An old binary opening a v4-stamped graph is refused with an - "upgrade omnigraph" error in both read-write and read-only modes. +## Storage versioning (no in-place migration) + +`db/manifest/migrations.rs` is the single place the on-disk `__manifest` shape is +reconciled with what the binary expects. Storage is **strict-single-version** (the +strand model): this binary reads exactly ONE internal-schema version +(`MIN_SUPPORTED == CURRENT == 4`), so there is no in-place migration. + +- **Graph creation** stamps `omnigraph:internal_schema_version` at CURRENT, so a + fresh graph always opens. +- **`Omnigraph::open`** (both read-write and read-only) reads main's stamp before + the coordinator reads any branch state and calls `refuse_if_stamp_unsupported`: + a stamp *below* CURRENT is refused with a rebuild-via-export/import message; a + stamp *above* CURRENT is refused with "upgrade omnigraph". The publisher + re-checks the stamp on its write path against the branch it targets, with no + object-store writes, so the check is safe under a read-only open. +- The stamp + `refuse_if_stamp_unsupported` floor is the only seam a future + in-place migration would re-introduce (re-add a dispatcher and lower + `MIN_SUPPORTED`). Until a concrete graph demands it, that machinery is + deliberately absent — see [versioning.md](versioning.md) (the compatibility + policy) and [the upgrade guide](../user/operations/upgrade.md) (the rebuild + recipe). + +The stamp history (v1 PK-less, v2 unenforced-PK, v3 `__run__*` sweep, v4 lineage +in `__manifest` with the commit-graph tables retired) is recorded on the +`INTERNAL_MANIFEST_SCHEMA_VERSION` doc-comment; only v4 is served. An earlier-stamped +graph is rebuilt via export/import, not migrated in place. ## Mid-query partial failure: closed by MR-794 diff --git a/docs/releases/v0.8.0.md b/docs/releases/v0.8.0.md index f85acdc9..5e0796ff 100644 --- a/docs/releases/v0.8.0.md +++ b/docs/releases/v0.8.0.md @@ -2,60 +2,72 @@ > Draft release notes for the next minor. The version line in `AGENTS.md` and the > crate manifests are bumped when this release is cut — these notes track the -> user-visible delta as the RFC-013 work lands. +> user-visible delta as the work lands. -This release moves the graph commit lineage into `__manifest` (RFC-013 Phase 7) -and ships a **one-time on-disk migration** for existing graphs. It is the first -release with an internal-schema change since v0.4.0, so it has an upgrade-order -requirement — read the upgrade notes before rolling it out. +This release moves the graph commit lineage into `__manifest` (RFC-013 Phase 7), +retires the two legacy commit-graph datasets, and surfaces the storage-format +version to operators. It is the first release with an internal-schema change since +v0.4.0, and storage is **strict-single-version**: there is no in-place migration — +a graph from an older release is rebuilt via export/import. Read the upgrade notes +before rolling it out. ## Graph lineage now lives in `__manifest` (internal schema v4) The graph commit DAG (commits, parents, merge parents, per-branch heads, and the authoring actor) is now stored in `__manifest` as `graph_commit` / `graph_head` rows, written in the **same commit (CAS)** as the table-version rows of a graph -publish. Previously the lineage lived in a separate `_graph_commits.lance` -dataset written after the manifest commit, leaving a narrow window where a crash -could land a manifest version with no matching lineage row. Folding the lineage -into the publish closes that gap by construction: a graph commit and its lineage -now land atomically at one manifest version. The in-memory commit graph is a -projection of those manifest rows; `_graph_commits.lance` is retained only as a -carrier for Lance branch refs and no longer receives commit rows. - -This bumps the `__manifest` internal schema stamp from **v3 to v4**. - -## Existing graphs migrate seamlessly on first write - -A graph created by an earlier binary (internal schema v3) keeps its lineage in -`_graph_commits.lance` with none in `__manifest`. On the **first read-write -open**, Omnigraph backfills that lineage into `__manifest` (the `migrate_v3_to_v4` -internal-schema step) and bumps the stamp to v4. The migration: - -- is **per-branch** — each branch backfills on its first write; -- is **idempotent and crash-safe** — the stamp bump is the last step, and the - backfill is keyed on the commit id, so a crash mid-migration re-runs harmlessly - on the next open; -- **preserves all data** — every commit, parent, merge parent, actor, and head is - carried over; commit ids are stable, so existing references still resolve. - -No data is lost and no operator action is required beyond upgrading the binary. - -Before its first write migrates the graph, a **read-only** open of a v3 graph -(e.g. `omnigraph commit list`, NDJSON export) still reads correct history via a -transitional fallback that sources the commit DAG from `_graph_commits.lance` — -read-only opens never write, so they never migrate, but they never show an empty -history either. - -## Breaking: upgrade writer binaries first - -Internal schema v4 is a hard version gate. Once a graph has been opened for write -by a v0.8.0 binary, its `__manifest` is stamped v4, and an **older binary will -refuse to open it** — read-write *and* read-only — with an -`upgrade omnigraph before opening this graph` error rather than silently -misreading the new lineage. This is the standard forward-version protection -(same shape as the v1→v2 / v2→v3 steps), now enforced on the read-only path too. - -**Upgrade order:** upgrade every writer (and reader) binary that touches a graph -to v0.8.0 before, or together with, the first write under the new version. A -mixed fleet where an old binary still writes the same graph is unsupported, as -with any internal-schema bump. +publish. Previously the lineage lived in a separate `_graph_commits.lance` dataset +written after the manifest commit, leaving a narrow window where a crash could land +a manifest version with no matching lineage row. Folding the lineage into the +publish closes that gap by construction: a graph commit and its lineage now land +atomically at one manifest version. The in-memory commit graph is a pure projection +of those manifest rows. + +This bumps the `__manifest` internal schema stamp to **v4**. + +## The `_graph_commits.lance` / `_graph_commit_actors.lance` tables are retired + +With lineage in `__manifest` and branch authority already on the manifest, the two +legacy commit-graph datasets are no longer created, read, or written. A graph this +release creates has neither. This removes two cold-open directory listings per +graph open and simplifies maintenance (`optimize` compacts only `__manifest` among +the internal tables now). + +## Strict-single-version storage: rebuild via export/import, no in-place migration + +Internal schema v4 is a hard version gate, enforced in **both** directions on every +open (read-write and read-only): + +- A graph stamped **below v4** (created by an older release whose storage format + this binary does not read) is refused with a **rebuild-via-export/import** + message. There is no in-place upgrade: export the graph with the older binary, + then `omnigraph init` + `omnigraph load` with this one. Data, vectors, and blobs + are preserved; commit history and branches are not carried over. See the + [upgrade guide](../user/operations/upgrade.md). +- A graph stamped **above v4** (created by a newer release) is refused with an + **`upgrade omnigraph before opening this graph`** error, so an old binary cannot + silently misread a newer format. + +This replaces the speculative one-time on-disk migration that earlier drafts of +this release described. The rationale (lower long-term liability than carrying +in-place migration code for a pre-release format) is in +[docs/dev/versioning.md](../dev/versioning.md). + +## See the storage-format version + +Operators can now read the internal-schema version directly instead of discovering +it through a refusal: + +- `omnigraph version` prints an `internal-schema ` line (the version this binary + serves). +- `omnigraph snapshot` reports the opened graph's on-disk `internal_schema_version`. +- The server `GET /healthz` response includes `internal_schema_version` (the + binary's served version). + +## Upgrade order + +Upgrade every binary that touches a graph to v0.8.0 together. A mixed fleet where an +older binary still writes a graph another has stamped v4 is unsupported, as with any +internal-schema bump. To move a pre-v4 graph forward, follow the +[upgrade guide](../user/operations/upgrade.md): export with the old binary, then +`init` + `load` with v0.8.0. diff --git a/docs/user/concepts/storage.md b/docs/user/concepts/storage.md index e3d9ef15..bad7726c 100644 --- a/docs/user/concepts/storage.md +++ b/docs/user/concepts/storage.md @@ -20,9 +20,8 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin - **Layout**: - `nodes/{fnv1a64-hex(type_name)}` — one Lance dataset per node type - `edges/{fnv1a64-hex(edge_type_name)}` — one Lance dataset per edge type - - `__manifest/` — the catalog of all sub-tables and their published versions, **and** the graph commit lineage (RFC-013 Phase 7) - - `_graph_commits.lance` / `_graph_commit_actors.lance` — legacy / branch-ref carriers. Since RFC-013 Phase 7 the graph lineage lives in `__manifest` (`graph_commit` / `graph_head` rows, written in the publish CAS); `_graph_commits.lance` no longer receives commit rows, but is retained to carry the Lance branch refs that `create_branch` / `list_branches` / the `cleanup` orphan reconciler operate on. A graph created before Phase 7 (internal schema v3) keeps its lineage here until its first read-write open, which migrates it into `__manifest` via `migrate_v3_to_v4`. - - (legacy `_graph_runs.lance` / `_graph_run_actors.lance` from pre-v0.4.0 graphs are inert; the run state machine was removed. The internal schema migration sweeps stale `__run__*` branches on first write-open; the inert dataset bytes themselves remain until a prefix-delete storage primitive lands) + - `__manifest/` — the catalog of all sub-tables and their published versions, **and** the graph commit lineage (RFC-013 Phase 7: `graph_commit` / `graph_head` rows). Graph-level branches are Lance branches on these datasets. + - `_graph_commit_recoveries.lance` — the crash-recovery audit log (one row per recovery action; see below). The former `_graph_commits.lance` / `_graph_commit_actors.lance` lineage tables are **retired**: lineage lives in `__manifest`, so a graph this binary creates has neither. - **Manifest row schema** (`object_id, object_type, location, metadata, base_objects, table_key, table_version, table_branch, row_count`): - `object_type` ∈ `table | table_version | table_tombstone | graph_commit | graph_head` - `table_key` ∈ `node: | edge:` (empty for `graph_commit` / `graph_head` lineage rows) @@ -35,18 +34,22 @@ OmniGraph is **not** a single Lance dataset; it is a *graph* of datasets coordin ### Internal schema versioning -The on-disk shape of `__manifest` is reconciled with the binary via a single version stamp held in the manifest dataset's schema-level metadata. +The on-disk shape of `__manifest` is reconciled with the binary via a single version stamp (`omnigraph:internal_schema_version`) held in the manifest dataset's schema-level metadata. Storage is **strict-single-version** (the strand model): this binary reads exactly ONE internal-schema version, and there is no in-place migration. -- **Graph creation** stamps the current version, so newly initialized graphs never need migration. -- **The open-for-write path** migrates the on-disk stamp before reading state. When the stamp matches the binary, this is a single metadata read with no writes; otherwise the migration walks steps forward (1→2, 2→3, …) until the stamp matches, then proceeds with the publish. Reads stay side-effect-free. -- **Forward-version protection**: a stamp *higher* than the binary's known version triggers a clear "upgrade omnigraph first" error. An old binary cannot clobber a newer schema by silently treating "unknown stamp" as "missing stamp". -- **Idempotency**: each migration step is safe to re-run. A crash between two metadata updates inside a single step leaves the partial state; the next open re-runs the step and the second update lands. +- **Graph creation** stamps the current version, so newly initialized graphs always open. +- **Both open paths** (read-write and read-only) read main's stamp before reading any data and refuse a graph the binary cannot serve: + - a stamp *below* CURRENT — a graph from an older release whose storage format this binary does not read — is refused with a **rebuild-via-export/import** message (there is no in-place upgrade; see the [upgrade guide](../operations/upgrade.md)). + - a stamp *above* CURRENT — a graph written by a newer release — is refused with an **"upgrade omnigraph first"** message, so an old binary cannot misread a newer format. +- The stamp is read with no object-store writes, so the check is safe under a read-only open. Operators can see a graph's stamp with `omnigraph snapshot` and the binary's served version with `omnigraph version` (the `internal-schema` line). -| Stamp | Shape change | +The stamp values below are historical; this binary serves only the current one (`v4`). An earlier-stamped graph is rebuilt via export/import, not migrated in place. + +| Stamp | Shape | |---|---| | v1 (implicit, pre-stamp) | `__manifest.object_id` had no PK annotation; no row-level CAS protection. | | v2 | `__manifest.object_id` carries an unenforced-primary-key annotation; row-level CAS engaged. | -| v3 | One-time sweep of legacy `__run__*` staging branches (pre-v0.4.0 Run state machine, removed) off `__manifest`. Runs at read-write open and on publish. | +| v3 | Legacy `__run__*` staging branches (pre-v0.4.0 Run state machine) swept off `__manifest`. | +| v4 | Graph lineage folded into `__manifest` as `graph_commit` / `graph_head` rows (RFC-013 Phase 7); the `_graph_commits.lance` / `_graph_commit_actors.lance` tables retired. **The only version this binary serves.** | ## On-disk layout @@ -62,7 +65,7 @@ flowchart TB manifest["__manifest/
L2 catalog of sub-tables"]:::l2 nodes["nodes/{fnv1a64-hex}/
one dataset per node type"]:::l2 edges["edges/{fnv1a64-hex}/
one dataset per edge type"]:::l2 - cgraph["_graph_commits.lance/
_graph_commit_actors.lance/
_graph_commit_recoveries.lance/"]:::l2 + cgraph["_graph_commit_recoveries.lance/
crash-recovery audit log"]:::l2 recovery["__recovery/{ulid}.json
recovery sidecars (transient)"]:::l2 refs["_refs/branches/{name}.json
graph-level branches"]:::l2 @@ -91,7 +94,7 @@ flowchart TB - **Graph root** is one directory (or S3 prefix). Everything below is part of one OmniGraph graph. - **`__manifest/`** is a Lance dataset whose rows describe which sub-table version is published at which graph-branch. Reading a snapshot starts here. - **`nodes/`** and **`edges/`** are sibling directories holding one Lance dataset per declared type. Names are `fnv1a64-hex` of the type name to keep paths fixed-length and case-safe. -- **`_graph_commits.lance`** is an L2 dataset retained only as a branch-ref carrier (and, on a pre-Phase-7 graph, the migration source). Since RFC-013 Phase 7 the graph commit DAG lives in `__manifest` as `graph_commit` / `graph_head` rows written in the publish CAS — `_graph_commits.lance` and its paired `_graph_commit_actors.lance` no longer receive commit rows. A graph created before Phase 7 (internal schema v3) backfills its lineage into `__manifest` on its first read-write open (`migrate_v3_to_v4`). (Pre-v0.4.0 graphs also have inert `_graph_runs.lance` / `_graph_run_actors.lance` from the removed Run state machine; the internal schema migration sweeps their stale `__run__*` branches, and the dataset bytes are reclaimed once a prefix-delete primitive lands.) +- The graph commit DAG lives in **`__manifest`** as `graph_commit` / `graph_head` rows written in the publish CAS (RFC-013 Phase 7). The former `_graph_commits.lance` / `_graph_commit_actors.lance` lineage tables are retired — a graph this binary creates has neither. - **`_graph_commit_recoveries.lance`** — one row per crash-recovery action. Joined by `graph_commit_id` to the graph commit lineage (the `graph_commit` rows in `__manifest` since RFC-013 Phase 7); the linked commit carries `actor_id=omnigraph:recovery`. Operators correlate recoveries with the original mutations they rolled forward / back via this join. - **`__recovery/{ulid}.json`** — transient sidecar files written by a writer before it advances the underlying dataset, deleted once the matching manifest publish succeeds. A sidecar persisting after process exit means the writer crashed mid-commit; the next read-write open processes it. Steady-state directory is empty. - **`_refs/branches/{name}.json`** is graph-level branch metadata — pointers from a branch name to the manifest version it heads. diff --git a/docs/user/index.md b/docs/user/index.md index cabd98a0..ce6712d0 100644 --- a/docs/user/index.md +++ b/docs/user/index.md @@ -46,6 +46,7 @@ start with install, then follow the section that matches your task. | Deploy the binary or container | [deployment.md](deployment.md) | | Use HTTP endpoints | [operations/server.md](operations/server.md) | | Compact, repair, and clean old versions | [operations/maintenance.md](operations/maintenance.md) | +| Upgrade across a storage-format change (export/import) | [operations/upgrade.md](operations/upgrade.md) | | Configure Cedar authorization | [operations/policy.md](operations/policy.md) | | Track actors and audit behavior | [operations/audit.md](operations/audit.md) | | Interpret errors and output formats | [operations/errors.md](operations/errors.md) | diff --git a/docs/user/operations/maintenance.md b/docs/user/operations/maintenance.md index d8df9506..4f901ff5 100644 --- a/docs/user/operations/maintenance.md +++ b/docs/user/operations/maintenance.md @@ -6,7 +6,7 @@ - Compacts every node + edge table on `main`, then reindexes them, then **publishes the resulting version to the `__manifest`** so the manifest's recorded version tracks the compacted-and-reindexed state. Reads pin the manifest version, so without this publish the work would be invisible to readers *and* would break the version precondition of the next schema apply / strict update/delete ("stale view … refresh and retry"). The publish advances the graph version (a system-attributed commit) only for tables that actually changed. - Rewrites small fragments into fewer large ones; old fragments remain reachable via older versions until `cleanup` runs. -- **Also compacts the internal system tables** `__manifest`, `_graph_commits`, and `_graph_commit_actors` (RFC-013 step 2), which accumulate one fragment per commit (the actor table only on the authenticated write path, where every commit carries an actor) and otherwise make every write's metadata scan grow with history. These take a simpler path than data tables: they are not `__manifest`-tracked (readers open them at their latest version), so compaction just advances their version in place — **no manifest publish and no recovery sidecar**. (The sidecar-free property is not because it is one commit — `compact_files` can emit a `ReserveFragments` commit before the `Rewrite`, and the auto-cleanup strip below is a further commit — but because every one of those commits is content-preserving and the table is read at its latest version, so a crash at any point leaves it readable and content-identical and the next `optimize` re-plans.) They appear in the returned stats under `table_key` `"__manifest"` / `"_graph_commits"` / `"_graph_commit_actors"` (the latter two only when present). They are **not yet covered by `cleanup`**, so their version chain still grows until the cleanup half lands (it requires a cleanup-resurrection safeguard first); run `optimize` on a cadence to keep per-write metadata scans flat. +- **Also compacts the internal `__manifest` table** (RFC-013 step 2), which accumulates one fragment per commit — it now carries the graph lineage and actor rows inline (RFC-013 Phase 7: `graph_commit` / `graph_head` rows), so on the authenticated write path every commit's actor lands here too — and otherwise makes every write's metadata scan grow with history. (The `_graph_commits.lance` / `_graph_commit_actors.lance` tables are retired, so there is no separate lineage table to compact.) It takes a simpler path than data tables: `__manifest` is read at its latest version, so compaction just advances its version in place — **no manifest publish and no recovery sidecar**. (The sidecar-free property is not because it is one commit — `compact_files` can emit a `ReserveFragments` commit before the `Rewrite`, and the auto-cleanup strip below is a further commit — but because every one of those commits is content-preserving and the table is read at its latest version, so a crash at any point leaves it readable and content-identical and the next `optimize` re-plans.) It appears in the returned stats under `table_key` `"__manifest"`. It is **not yet covered by `cleanup`**, so its version chain still grows until the cleanup half lands (it requires a cleanup-resurrection safeguard first); run `optimize` on a cadence to keep per-write metadata scans flat. - **`optimize` is non-destructive by construction — it never garbage-collects versions, on any table (data or internal).** Compaction rewrites fragments and advances the version; old versions stay reachable until you run `cleanup`. This holds even for a graph created by an older binary that stored an on-by-default Lance `auto_cleanup` hook: `compact_files` / `optimize_indices` commit with the hook enabled and expose no skip override, so before compacting **any** table `optimize` strips its stale `lance.auto_cleanup.*` config first, so Lance's commit-time GC hook cannot fire and silently prune `__manifest`-pinned versions. (Graphs created by current binaries store no such config; the strip is the upgrade-path safety net.) The internal-table path additionally tolerates a concurrent live writer: it runs a **bounded** rebase-and-retry, so transient contention does not fail the operator's `optimize` or the live write — but sustained contention past the retry budget surfaces a loud conflict error rather than looping forever (bounded and observable, not a silent give-up). The data-table path holds the per-table write queue while it compacts, so it does not contend with mutations on that table in the first place. - **Reindex (index coverage maintenance).** A scalar/FTS/vector index only covers the fragments it was built over. Rows appended after the index was built (e.g. by `load --mode merge`, whose commit does not rebuild an already-existing index) are scanned unindexed, and compaction itself rewrites fragments out of an index's coverage. `optimize` runs Lance's incremental `optimize_indices` after compaction to fold those fragments back in (a delta merge, not a full retrain), restoring full coverage so equality/range/traversal predicates stay index-accelerated. This is why a table with **no compaction work but stale index coverage still commits** a new version under `optimize`. Run `optimize` on a cadence at least as frequent as your freshness window so recently-loaded rows do not linger in the unindexed flat-scan tail. - **Create declared-but-missing indexes (the index reconciler).** `@index`/`@key` declares intent; `schema apply` records it but builds nothing, and `load`/`mutate` defer a column that cannot be built yet (a `Vector` column with no trainable vectors). `optimize` materializes any such declared-but-unbuilt index over the compacted layout — so it is the convergence path for an `@index` added after data exists, or a vector index whose embeddings arrived via a later `embed`. A column still not buildable (no vectors yet) is reported on the table's stat as `pending_indexes` (visible in `--json`), not treated as a failure; the next `optimize` retries. So `optimize` is the single operator-facing index reconciler: it compacts, restores coverage, **and** builds declared-but-missing indexes. diff --git a/docs/user/operations/upgrade.md b/docs/user/operations/upgrade.md new file mode 100644 index 00000000..a6bfd4aa --- /dev/null +++ b/docs/user/operations/upgrade.md @@ -0,0 +1,84 @@ +# Upgrading across a storage-format change (export / import) + +Omnigraph storage is **strict-single-version**: a binary reads exactly one +internal-schema (storage-format) version. There is no in-place migration. When a +release changes the internal schema, a graph created by an older release is +**refused on open** with a message that points here, and you move it forward by +rebuilding it: export with the old binary, then `init` + `load` with the new one. + +This is a deliberate pre-release design choice. The rationale (lower long-term +liability than carrying in-place migration code for a format that is still +changing) is in [docs/dev/versioning.md](../../dev/versioning.md). + +## How you know you need this + +Opening a graph whose stamp is below the binary's version fails with: + +``` +__manifest is stamped at internal schema vN, but this omnigraph reads only vM. +This graph was created by an older omnigraph release; rebuild it: run `omnigraph +export` with the older omnigraph binary that created it, then `omnigraph init` + +`omnigraph load` with this one. (Data, vectors, and blobs are preserved; commit +history and branches are not.) +``` + +You can also check versions before you hit a refusal: + +- `omnigraph version` — the binary's served version (the `internal-schema ` line). +- `omnigraph snapshot ` — the graph's on-disk `internal_schema_version`. + +If the graph's stamp is **higher** than the binary's, the binary is too old — +upgrade omnigraph rather than rebuilding the graph. + +## What is preserved (and what is not) + +| Preserved | Not preserved | +|---|---| +| All node and edge rows | Commit history (the graph DAG starts fresh) | +| Vector columns (embeddings round-trip verbatim) | Branches (export is a single-branch snapshot) | +| Blob columns | Snapshot/time-travel history of the old graph | +| The schema (re-applied at `init`) | | + +The rebuilt graph is a faithful copy of the exported branch's **current state**. +If you need history or multiple branches carried forward, there is no supported +path today — export each branch you care about separately. + +## The recipe + +Use the **old** binary for the export steps and the **new** binary for init/load. +Keep them as separate executables (for example a downloaded release archive) so you +can run both. + +```bash +# 1. With the OLD binary — capture the schema and the data. +old-omnigraph schema show s3://bucket/graph.omni > schema.pg +old-omnigraph export s3://bucket/graph.omni > graph.jsonl + +# 2. With the NEW binary — create a fresh graph and load the data. +omnigraph init --schema schema.pg s3://bucket/graph-v2.omni +omnigraph load --mode overwrite --data graph.jsonl s3://bucket/graph-v2.omni + +# 3. With the NEW binary — verify. +omnigraph snapshot s3://bucket/graph-v2.omni # internal_schema_version is current +omnigraph version # confirms the binary's served version +``` + +`omnigraph export` writes a full JSONL snapshot (one row per node/edge, all +columns including vectors and blobs) of the chosen branch (default `main`; pass +`--branch` for another) to stdout. `omnigraph load --mode overwrite` replaces the +target graph's contents with that snapshot. + +Once you have verified the rebuilt graph, retire the old one. If you rebuilt +in place (same URI), export to a side location first and only overwrite after the +new graph verifies. + +## Notes + +- **Upgrade the whole fleet together.** A mixed fleet where an old binary still + writes a graph a newer binary has stamped is unsupported, as with any + internal-schema bump. +- **Embeddings are not recomputed.** Export carries the stored vectors verbatim, so + a load does not re-run the embedding pipeline. If you changed the embedding model, + re-embed after loading. +- **Server deployments**: take the graph out of the serving set, rebuild it offline + with the CLI, then point the cluster at the rebuilt graph (`cluster apply`). diff --git a/docs/user/reference/constants.md b/docs/user/reference/constants.md index 0e9ee22f..e952a377 100644 --- a/docs/user/reference/constants.md +++ b/docs/user/reference/constants.md @@ -3,9 +3,9 @@ | Name | Value | Area | |---|---|---| | `MANIFEST_DIR` | `__manifest` | manifest layout | -| Commit graph dir | `_graph_commits.lance` | branch-ref carrier + pre-v4 lineage source (lineage lives in `__manifest` since RFC-013 Phase 7) | -| Run registry dir (legacy, removed) | `_graph_runs.lance` | inert post-v0.4.0; bytes remain until a prefix-delete primitive lands | -| Run branch prefix (legacy, removed) | `__run__` | swept off `__manifest` by the internal schema migration; no longer a reserved name | +| Commit graph dirs (retired) | `_graph_commits.lance` / `_graph_commit_actors.lance` | retired in Phase B; lineage lives in `__manifest` (`graph_commit` / `graph_head` rows) since RFC-013 Phase 7. A graph this binary creates has neither. | +| Recovery audit dir | `_graph_commit_recoveries.lance` | one row per crash-recovery action (`omnigraph commit list --filter actor=omnigraph:recovery`) | +| Run branch prefix (legacy, removed) | `__run__` | pre-v0.4.0 Run state machine; no longer a reserved name. A graph still carrying `__run__*` branches is sub-v4 and refused on open (rebuild via export/import). | | Schema apply lock | `__schema_apply_lock__` | schema apply | | Manifest publisher retry budget | `PUBLISHER_RETRY_BUDGET = 5` | manifest publish | | Internal manifest schema version | `INTERNAL_MANIFEST_SCHEMA_VERSION = 4` | manifest migrations (v4 = graph lineage in `__manifest`, RFC-013 Phase 7) | From 22036f4261397cacf8f0b15040cb272408a5193d Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Sun, 28 Jun 2026 16:06:16 +0200 Subject: [PATCH 8/8] =?UTF-8?q?test(manifest):=20cover=20the=20v3-refusal?= =?UTF-8?q?=E2=86=92export/import=20rebuild=20cycle=20and=20branch=20stamp?= =?UTF-8?q?=20inheritance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coverage additions from PR review (P1): (a) sub_current_graph_is_refused_then_rebuilt_via_export_import — the full operator narrative in one flow: load → export → a sub-CURRENT graph (stamp rewound below CURRENT) is refused with the export nudge → fresh init + load(export) → data present and the rebuilt graph opens. The refusal is stamp-only (read before any data), so a stamp-rewound graph is a faithful stand-in for a real older-release graph without a second binary; vector/blob fidelity stays covered by tests/export.rs. (b) branch_inherits_main_internal_schema_stamp — proves a branch cannot diverge from main's stamp under single-binary operation (create_branch forks main's __manifest, the publisher does not re-stamp), which is why the graph-level (main-only) stamp gate is sufficient for supported inputs. A divergent branch stamp needs concurrent multi-version writers, the unsupported topology recorded as a known gap. --- crates/omnigraph/src/db/manifest/tests.rs | 101 ++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/crates/omnigraph/src/db/manifest/tests.rs b/crates/omnigraph/src/db/manifest/tests.rs index 9b677fb4..39206c22 100644 --- a/crates/omnigraph/src/db/manifest/tests.rs +++ b/crates/omnigraph/src/db/manifest/tests.rs @@ -1592,6 +1592,36 @@ async fn test_init_stamps_internal_schema_version() { ); } +// The internal-schema stamp is gated at the graph (main) level. That is sufficient +// for supported inputs precisely because a branch cannot diverge from main's stamp +// under single-binary operation: a fresh graph stamps main at CURRENT, `create_branch` +// forks main's `__manifest` (carrying its schema metadata, stamp included), and the +// publisher writes rows without re-stamping. So every branch is always at main's +// stamp. (A divergent branch stamp needs concurrent *multi-version* writers — an +// unsupported topology, recorded as a known gap in docs/dev/invariants.md.) This is +// the "if mixed branch stamps are impossible for supported inputs, prove it" test. +#[tokio::test] +async fn branch_inherits_main_internal_schema_stamp() { + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let catalog = build_test_catalog(); + let mut mc = ManifestCoordinator::init(uri, &catalog).await.unwrap(); + mc.create_branch("feature").await.unwrap(); + + let main_ds = open_manifest_dataset(uri, None).await.unwrap(); + let feature_ds = open_manifest_dataset(uri, Some("feature")).await.unwrap(); + assert_eq!( + super::migrations::read_stamp(&main_ds), + super::migrations::INTERNAL_MANIFEST_SCHEMA_VERSION, + "fresh graph stamps main at CURRENT", + ); + assert_eq!( + super::migrations::read_stamp(&feature_ds), + super::migrations::read_stamp(&main_ds), + "create_branch forks main's stamp — a branch never diverges under single-binary operation", + ); +} + #[tokio::test] async fn test_publish_rejects_manifest_stamped_at_future_version() { let dir = tempfile::tempdir().unwrap(); @@ -1727,6 +1757,77 @@ async fn sub_current_graph_is_refused_on_open_with_rebuild_hint() { ); } +// The full operator upgrade narrative in one flow: load data → export → a graph from +// an older release (simulated by rewinding the stamp below CURRENT) is refused with +// the export/import nudge → rebuild via fresh `init` + `load` → the data is present +// and the rebuilt graph opens. The refusal is **stamp-only** (read before any data), +// so a stamp-rewound graph is a faithful stand-in for a real older-release graph +// without needing a second binary — the on-disk layout is never reached. Data +// fidelity for vector / blob columns is covered by the export round-trip tests in +// `tests/export.rs`; this test composes the refusal with the rebuild so the operator +// path proven in the docs (`docs/user/operations/upgrade.md`) is exercised end to end. +#[tokio::test] +async fn sub_current_graph_is_refused_then_rebuilt_via_export_import() { + use crate::db::Omnigraph; + use crate::loader::{LoadMode, load_jsonl}; + + let schema = "node Person {\n name: String @key\n age: I32?\n}\n"; + let seed = "{\"type\":\"Person\",\"data\":{\"name\":\"alice\",\"age\":30}}\n\ + {\"type\":\"Person\",\"data\":{\"name\":\"bob\",\"age\":41}}\n"; + + // The operator's existing graph; export it with the (here, current) binary + // before upgrading. + let dir_old = tempfile::tempdir().unwrap(); + let uri_old = dir_old.path().to_str().unwrap(); + let mut db_old = Omnigraph::init(uri_old, schema).await.unwrap(); + load_jsonl(&mut db_old, seed, LoadMode::Overwrite) + .await + .unwrap(); + let exported = db_old.export_jsonl("main", &[], &[]).await.unwrap(); + assert!( + exported.contains("alice") && exported.contains("bob"), + "export must carry the loaded rows", + ); + drop(db_old); + + // Make it look like a graph from an older release: rewind the stamp below CURRENT. + { + let mut ds = open_manifest_dataset(uri_old, None).await.unwrap(); + super::migrations::set_stamp_for_test(&mut ds, 3).await.unwrap(); + } + let err = match Omnigraph::open(uri_old).await { + Ok(_) => panic!("a sub-CURRENT graph must be refused on open"), + Err(err) => err, + }; + assert!( + err.to_string().contains("export"), + "the refusal must nudge the operator to `omnigraph export`, got: {err}", + ); + + // Rebuild with this binary: fresh init + load the export. + let dir_new = tempfile::tempdir().unwrap(); + let uri_new = dir_new.path().to_str().unwrap(); + let mut db_new = Omnigraph::init(uri_new, schema).await.unwrap(); + load_jsonl(&mut db_new, &exported, LoadMode::Overwrite) + .await + .unwrap(); + + // The rebuilt graph preserves the data and is at CURRENT (opens without refusal). + let rebuilt = db_new.export_jsonl("main", &[], &[]).await.unwrap(); + assert!( + rebuilt.contains("alice") && rebuilt.contains("bob"), + "the rebuilt graph must preserve every node", + ); + assert_eq!( + rebuilt.lines().count(), + exported.lines().count(), + "export → init → load round-trips every row", + ); + Omnigraph::open(uri_new) + .await + .expect("the rebuilt graph is at CURRENT and opens"); +} + // ── RFC-013 Phase 7 / step 5: the `graph_head` concurrency gate ────────────── // // Two (or N) writers committing DISJOINT tables on the same branch still share