diff --git a/core/src/Literal.ts b/core/src/Literal.ts index d9a6107a6..1e1c54443 100644 --- a/core/src/Literal.ts +++ b/core/src/Literal.ts @@ -73,6 +73,13 @@ export class Literal { return parseFloat(numberString) } + if(body.startsWith("boolean:")) { + const boolString = decodeURIComponent(body.substring(8)) + if (boolString === "true") return true + if (boolString === "false") return false + throw new Error(`Can't parse boolean literal: ${boolString}`) + } + if(body.startsWith("json:")) { const json = body.substring(5) return JSON.parse(decodeURIComponent(json)) diff --git a/core/src/model/Ad4mModel.ts b/core/src/model/Ad4mModel.ts index 3fd122859..ee7e7f4a6 100644 --- a/core/src/model/Ad4mModel.ts +++ b/core/src/model/Ad4mModel.ts @@ -11,7 +11,7 @@ import { isArrayType, determinePredicate, determineNamespace, buildModelFromJSON import type { SHACLShape } from "../shacl/SHACLShape"; import type { JSONSchemaProperty, JSONSchema, JSONSchemaToModelOptions } from "./json-schema"; -import { buildSPARQLQuery } from "./query-sparql"; +import { buildSPARQLQuery, valueToLiteralIri } from "./query-sparql"; import { ModelQueryBuilder } from "./ModelQueryBuilder"; import { normalizeValue, @@ -1091,7 +1091,23 @@ export class Ad4mModel { } if (resolveLanguage) { - value = await this._perspective.createExpression(value, resolveLanguage); + // For the built-in "literal" resolver we produce the deterministic + // `literal:string:` / `:number:` / `:boolean:` / `:json:` URL directly + // instead of round-tripping through `createExpression`. The Rust + // mirror of this bypass lives in `resolve_property_value` — without + // both sides, the same value would land as `literal:json:` on writes through this path, defeating the indexed + // equality lookups the WHERE builders now use. + // + // Route through `valueToLiteralIri` (also used by `queryToSPARQL`) + // so URI-shaped strings stay raw on both write AND read: writing + // `"https://example.com"` keeps it as ``, + // matching what WHERE filters generate for the same value. + if (resolveLanguage === "literal") { + value = valueToLiteralIri(value); + } else { + value = await this._perspective.createExpression(value, resolveLanguage); + } } await this._perspective.executeAction(actions, this._baseExpression, [{ name: "value", value }], batchId); diff --git a/core/src/model/query-sparql.ts b/core/src/model/query-sparql.ts index 87ea8ba7a..99ad591df 100644 --- a/core/src/model/query-sparql.ts +++ b/core/src/model/query-sparql.ts @@ -81,8 +81,14 @@ function looksLikeUri(value: string): boolean { * Convert a JS value to its literal: IRI form, matching how the Rust executor * stores property values that don't have a resolveLanguage set. * Strings that already look like URIs are returned as-is. + * + * Exported so write-side helpers (e.g. `Ad4mModel.setProperty`'s literal + * resolveLanguage bypass) can keep their on-disk form aligned with what + * `queryToSPARQL()` filters against — otherwise the same value can be + * stored as `` from one path while WHERE + * builders probe `` from the other. */ -function valueToLiteralIri(value: any): string { +export function valueToLiteralIri(value: any): string { if (typeof value === 'string') { if (looksLikeUri(value)) return value; return Literal.from(value).toUrl(); diff --git a/core/src/perspectives/SparqlBindings.ts b/core/src/perspectives/SparqlBindings.ts index 0de6d2ac0..5bcab8103 100644 --- a/core/src/perspectives/SparqlBindings.ts +++ b/core/src/perspectives/SparqlBindings.ts @@ -47,17 +47,16 @@ export function parseSparqlCount(result: CountBinding[] | undefined | null): num /** * Decode a `Literal`-encoded SPARQL binding back to a plain string. * - * Mirrors Flux's local `parseLit` helper. Returns `''` for `undefined`/empty - * input. Falls through to the raw value if decoding fails (defensive — the - * binding may already be a plain string for unwrapped properties). + * Returns `''` for `undefined`/empty input. Falls through to the raw value + * if decoding fails — bindings against properties without `resolveLanguage` + * are already raw URIs and should pass through unchanged. `literal:json:` + * objects are JSON-stringified for display. */ export function parseLit(val: string | undefined | null): string { if (val === undefined || val === null || val === '') return ''; try { const result = Literal.fromUrl(val).get(); - if (result && typeof result === 'object') { - return (result as { data?: string }).data ?? JSON.stringify(result); - } + if (typeof result === 'object' && result !== null) return JSON.stringify(result); return String(result); } catch { return val; diff --git a/rust-executor/src/languages/literal.rs b/rust-executor/src/languages/literal.rs index 6f5ff8e1c..7e22221fd 100644 --- a/rust-executor/src/languages/literal.rs +++ b/rust-executor/src/languages/literal.rs @@ -31,9 +31,11 @@ pub fn literal_encode(value: &JsonValue) -> String { /// Decode a literal URL expression part back into a JSON value. /// -/// Mirrors the TypeScript `Literal.fromUrl("literal://").get()` behavior. -/// If the decoded value is not an object (i.e., is a primitive), wraps it in a standard -/// expression envelope with `author`, `timestamp`, `data`, and `proof` fields. +/// Mirrors the TypeScript `Literal.fromUrl("literal:").get()`. +/// `string:` / `number:` / `boolean:` decode to their typed primitive. `json:` +/// payloads are returned as the decoded JSON value verbatim — if the payload +/// happens to be a signed-expression envelope (`{author, timestamp, data, proof}`), +/// it is the caller's job to interpret that shape. pub fn literal_decode(expression_part: &str) -> Result { let value = if let Some(rest) = expression_part.strip_prefix("string:") { let decoded = percent_decode_str(rest).decode_utf8().map_err(|e| { @@ -86,27 +88,7 @@ pub fn literal_decode(expression_part: &str) -> Result serde_json::from_str(&decoded).unwrap_or(JsonValue::String(decoded.to_string())) }; - // If the value is already an object (e.g., a full expression), return it as-is. - // Otherwise, wrap it in a standard expression envelope. - if value.is_object() { - Ok(value) - } else { - let mut envelope = serde_json::Map::new(); - envelope.insert( - "author".to_string(), - JsonValue::String("".to_string()), - ); - envelope.insert( - "timestamp".to_string(), - JsonValue::String("".to_string()), - ); - envelope.insert("data".to_string(), value); - envelope.insert( - "proof".to_string(), - JsonValue::Object(serde_json::Map::new()), - ); - Ok(JsonValue::Object(envelope)) - } + Ok(value) } #[cfg(test)] @@ -119,8 +101,7 @@ mod tests { let encoded = literal_encode(&value); assert!(encoded.starts_with("string:")); let decoded = literal_decode(&encoded).unwrap(); - // Primitives get wrapped in an envelope - assert_eq!(decoded["data"], value); + assert_eq!(decoded, value); } #[test] @@ -129,7 +110,7 @@ mod tests { let encoded = literal_encode(&value); assert!(encoded.starts_with("number:")); let decoded = literal_decode(&encoded).unwrap(); - assert_eq!(decoded["data"], value); + assert_eq!(decoded, value); } #[test] @@ -138,7 +119,7 @@ mod tests { let encoded = literal_encode(&value); assert!(encoded.starts_with("boolean:")); let decoded = literal_decode(&encoded).unwrap(); - assert_eq!(decoded["data"], value); + assert_eq!(decoded, value); } #[test] diff --git a/rust-executor/src/perspectives/mod.rs b/rust-executor/src/perspectives/mod.rs index 6efd0e2e4..0e698c8ba 100644 --- a/rust-executor/src/perspectives/mod.rs +++ b/rust-executor/src/perspectives/mod.rs @@ -119,6 +119,35 @@ pub fn initialize_from_db() { Err(e) => log::warn!("Reifier migration for {}: {}", handle_clone.uuid, e), } + // Signed-envelope → plain-literal migration. The new indexed WHERE + // path probes `literal:*` targets directly, so if this migration + // fails midway through the perspective is left with a mix of legacy + // signed-envelope targets and new plain-literal targets — equality + // filters would then silently miss the unmigrated rows. Skip + // initialising the perspective on error so the next executor restart + // can retry the migration cleanly rather than serving stale results. + match p.sparql_store.migrate_signed_envelopes_to_plain_literals() { + Ok(count) if count > 0 => { + log::info!( + "🔄 Signed-envelope migration for {}: {} envelopes converted", + handle_clone.uuid, + count + ); + } + Ok(_) => {} // Already migrated or nothing to migrate + Err(e) => { + log::error!( + "Signed-envelope migration failed for {}: {} — \ + skipping perspective init to avoid mixing legacy and \ + migrated link targets under the new indexed WHERE path. \ + Will retry on next executor restart.", + handle_clone.uuid, + e + ); + return; + } + } + // Rebuild SPARQL index from existing links // Skip SPARQL rebuild if persistent store already has data if p.sparql_store.has_data() { diff --git a/rust-executor/src/perspectives/model_query/integration_tests.rs b/rust-executor/src/perspectives/model_query/integration_tests.rs index c3e3ef175..6244ac268 100644 --- a/rust-executor/src/perspectives/model_query/integration_tests.rs +++ b/rust-executor/src/perspectives/model_query/integration_tests.rs @@ -3352,9 +3352,11 @@ async fn test_full_model_query_ops_contains_with_pagination() { ); } -/// Helper: create a signed-envelope literal IRI (mimics what expression.create("literal", value) -/// produces in production). The signed envelope is JSON with {author, timestamp, data, proof}. -fn signed_envelope_literal(value: &str) -> String { +/// Build a `literal:json:` IRI — the shape produced by +/// `expression.create("literal", value)` and the shape stored on disk by +/// older databases that pre-date plain-literal writes. Used by tests that +/// seed envelope-form data and then exercise the migration path. +fn legacy_envelope_literal(value: &str) -> String { let envelope = serde_json::json!({ "author": "did:key:zQ3shTestAgent", "timestamp": "2024-01-01T00:00:00.000Z", @@ -3370,16 +3372,15 @@ fn signed_envelope_literal(value: &str) -> String { format!("literal:json:{}", literal_percent_encode(&json_str)) } -/// Regression test for signed-envelope literals with fn/parse_literal WHERE clauses. -/// Exercises the exact pattern used by paginateSubscribe: model query with WHERE -/// filtering on a literal property, pagination (limit/offset), and count=true, -/// where stored values are signed expression envelopes (literal:json:{signed}). +/// Seed envelope-form data, run the migration, and verify model queries +/// (WHERE + pagination + count) succeed against the rewritten plain-literal +/// targets. Guards the boot-time upgrade path for stores that still hold +/// envelope-shaped targets from older writers. #[tokio::test] -async fn test_signed_envelope_where_paginate_count() { +async fn test_legacy_envelope_migrated_then_paginate_count() { let store = SparqlStore::new(None).unwrap(); let ts_base = 1700000000000i64; - // Insert 4 items: 3 active, 1 inactive — all using signed envelope format let items = vec![ ("test://item-1", "active", "Alpha"), ("test://item-2", "active", "Beta"), @@ -3395,7 +3396,7 @@ async fn test_signed_envelope_where_paginate_count() { .add_link(&make_link( uri, "ns://status", - &signed_envelope_literal(status), + &legacy_envelope_literal(status), &ts, )) .unwrap(); @@ -3403,12 +3404,22 @@ async fn test_signed_envelope_where_paginate_count() { .add_link(&make_link( uri, "ns://name", - &signed_envelope_literal(name), + &legacy_envelope_literal(name), &ts, )) .unwrap(); } + // T4: simulate first-boot migration converting envelope-form data to plain literals. + // After migration the new index-friendly WHERE (V4) can probe the POS index. + let migrated = store + .migrate_signed_envelopes_to_plain_literals() + .expect("migration should succeed"); + assert!( + migrated > 0, + "expected migration to rewrite at least one envelope, got {migrated}" + ); + let shape_json = r#"{ "className": "Task", "properties": { @@ -3455,11 +3466,12 @@ async fn test_signed_envelope_where_paginate_count() { "Second item by timestamp" ); - // Verify hydration: name should be the unwrapped data, not the full signed envelope + // Verify hydration: after migration the stored target is `literal:string:active`, + // and parse_literal_value decodes it back to the plain "active" string. assert_eq!( result.instances[0]["status"].as_str().unwrap(), "active", - "Status should be unwrapped from signed envelope" + "Status should be plain literal post-migration" ); // Page 2: offset 2 @@ -3492,14 +3504,15 @@ async fn test_signed_envelope_where_paginate_count() { ); } -/// Regression: mixed literal formats (plain + signed envelope) coexist in the same query. -/// This can happen during migration or when different code paths create links. +/// Mixed envelope-form and plain-form rows in the same store all become +/// queryable after one migration pass, including via the `contains` filter +/// (which still routes through `fn/parse_literal`). #[tokio::test] -async fn test_mixed_plain_and_signed_envelope_where() { +async fn test_legacy_mixed_migrated_then_contains() { let store = SparqlStore::new(None).unwrap(); let ts_base = 1700000000000i64; - // Item 1: plain literal (old format) + // Item 1: target is already a plain literal. store .add_link(&make_link( "test://old", @@ -3517,7 +3530,7 @@ async fn test_mixed_plain_and_signed_envelope_where() { )) .unwrap(); - // Item 2: signed envelope (new format) + // Item 2: target is an envelope-form literal. store .add_link(&make_link( "test://new", @@ -3530,11 +3543,15 @@ async fn test_mixed_plain_and_signed_envelope_where() { .add_link(&make_link( "test://new", "ns://body", - &signed_envelope_literal("hello signed"), + &legacy_envelope_literal("hello signed"), &format!("{}", ts_base + 1), )) .unwrap(); + store + .migrate_signed_envelopes_to_plain_literals() + .expect("migration should succeed"); + let shape_json = r#"{ "className": "Msg", "properties": { @@ -3544,7 +3561,8 @@ async fn test_mixed_plain_and_signed_envelope_where() { "relations": {} }"#; - // Query with contains "hello" — should match both formats + // Query with contains "hello" — `contains` still uses fn/parse_literal, so + // it works against the plain-literal form post-migration. let mut wc = BTreeMap::new(); wc.insert( "body".to_string(), @@ -3566,14 +3584,14 @@ async fn test_mixed_plain_and_signed_envelope_where() { .await .unwrap(); - assert_eq!(result.instances.len(), 2, "Both formats should match"); + assert_eq!(result.instances.len(), 2, "Both items should match"); assert_eq!(result.instances[0]["body"].as_str().unwrap(), "hello plain"); assert_eq!( result.instances[1]["body"].as_str().unwrap(), "hello signed" ); - // Exact match on signed envelope value + // Exact equality on the migrated value — V4 emits a direct IRI probe. let mut wc2 = BTreeMap::new(); wc2.insert( "body".to_string(), @@ -3591,13 +3609,183 @@ async fn test_mixed_plain_and_signed_envelope_where() { .await .unwrap(); - assert_eq!(result2.instances.len(), 1, "Exact match on signed envelope"); + assert_eq!( + result2.instances.len(), + 1, + "Exact match on migrated literal" + ); assert_eq!( result2.instances[0]["body"].as_str().unwrap(), "hello signed" ); } +/// Same workload as `test_legacy_envelope_migrated_then_paginate_count` but +/// with plain `literal:string:` targets from the start. Confirms model +/// queries reach the rows through the indexed direct-IRI WHERE form alone, +/// without any envelope unwrap step. +#[tokio::test] +async fn test_plain_literal_where_paginate_count() { + let store = SparqlStore::new(None).unwrap(); + let ts_base = 1700000000000i64; + + let items = vec![ + ("test://item-1", "active", "Alpha"), + ("test://item-2", "active", "Beta"), + ("test://item-3", "inactive", "Gamma"), + ("test://item-4", "active", "Delta"), + ]; + for (i, (uri, status, name)) in items.iter().enumerate() { + let ts = format!("{}", ts_base + i as i64); + store + .add_link(&make_link(uri, "ns://type", "ns://task", &ts)) + .unwrap(); + store + .add_link(&make_link(uri, "ns://status", &signed_literal(status), &ts)) + .unwrap(); + store + .add_link(&make_link(uri, "ns://name", &signed_literal(name), &ts)) + .unwrap(); + } + + let shape_json = r#"{ + "className": "Task", + "properties": { + "type": { "predicate": "ns://type", "required": true, "flag": true, "initial": "ns://task" }, + "status": { "predicate": "ns://status", "required": false, "resolveLanguage": "literal" }, + "name": { "predicate": "ns://name", "required": false, "resolveLanguage": "literal" } + }, + "relations": {} + }"#; + + let mut wc = BTreeMap::new(); + wc.insert( + "status".to_string(), + WhereCondition::String("active".to_string()), + ); + let result = execute_model_query_from_json( + &store, + "Task", + &ModelQueryInput { + where_clause: Some(wc.clone()), + limit: Some(2), + offset: Some(0), + order: Some(vec![("timestamp".to_string(), OrderDirection::ASC)]), + count: Some(true), + ..Default::default() + }, + shape_json, + ) + .await + .unwrap(); + + assert_eq!(result.instances.len(), 2, "Page should have 2 items"); + assert_eq!(result.total_count, 3, "Total active items should be 3"); + assert_eq!(result.instances[0]["name"].as_str().unwrap(), "Alpha"); + assert_eq!(result.instances[1]["name"].as_str().unwrap(), "Beta"); + assert_eq!(result.instances[0]["status"].as_str().unwrap(), "active"); + + let result2 = execute_model_query_from_json( + &store, + "Task", + &ModelQueryInput { + where_clause: Some(wc), + limit: Some(2), + offset: Some(2), + order: Some(vec![("timestamp".to_string(), OrderDirection::ASC)]), + count: Some(true), + ..Default::default() + }, + shape_json, + ) + .await + .unwrap(); + + assert_eq!(result2.instances.len(), 1); + assert_eq!(result2.total_count, 3); + assert_eq!(result2.instances[0]["name"].as_str().unwrap(), "Delta"); +} + +/// Guards that the `contains` filter — which can't reduce to a direct IRI +/// equality and so still goes through `fn/parse_literal` for substring +/// semantics — keeps matching plain `literal:string:` targets correctly. +#[tokio::test] +async fn test_plain_literal_contains_works_on_fn_parse_literal_path() { + let store = SparqlStore::new(None).unwrap(); + let ts_base = 1700000000000i64; + + store + .add_link(&make_link( + "test://a", + "ns://type", + "ns://msg", + &format!("{ts_base}"), + )) + .unwrap(); + store + .add_link(&make_link( + "test://a", + "ns://body", + &signed_literal("hello world"), + &format!("{ts_base}"), + )) + .unwrap(); + + store + .add_link(&make_link( + "test://b", + "ns://type", + "ns://msg", + &format!("{}", ts_base + 1), + )) + .unwrap(); + store + .add_link(&make_link( + "test://b", + "ns://body", + &signed_literal("goodbye world"), + &format!("{}", ts_base + 1), + )) + .unwrap(); + + let shape_json = r#"{ + "className": "Msg", + "properties": { + "type": { "predicate": "ns://type", "required": true, "flag": true, "initial": "ns://msg" }, + "body": { "predicate": "ns://body", "required": false, "resolveLanguage": "literal" } + }, + "relations": {} + }"#; + + let mut wc = BTreeMap::new(); + wc.insert( + "body".to_string(), + WhereCondition::Ops(WhereOps { + contains: Some(Value::String("hello".to_string())), + ..Default::default() + }), + ); + let result = execute_model_query_from_json( + &store, + "Msg", + &ModelQueryInput { + where_clause: Some(wc), + order: Some(vec![("timestamp".to_string(), OrderDirection::ASC)]), + ..Default::default() + }, + shape_json, + ) + .await + .unwrap(); + + assert_eq!( + result.instances.len(), + 1, + "contains 'hello' should match only one row" + ); + assert_eq!(result.instances[0]["body"].as_str().unwrap(), "hello world"); +} + // ----------------------------------------------------------------------- // Performance / scale tests // ----------------------------------------------------------------------- @@ -4199,3 +4387,116 @@ async fn test_resolve_projections_where_filter_via_target_shape_property() { "list with limit:1 should return the hydrated like signal id, got {got}" ); } + +// ----------------------------------------------------------------------------- +// Indexed-WHERE benchmark +// ----------------------------------------------------------------------------- +// +// `cargo test --release --lib perspectives::model_query::integration_tests::bench` +// +// Compares two equivalent SPARQL queries against the same `literal:string:` +// data: an indexed direct-IRI probe vs. a `fn/parse_literal`-wrapped FILTER. +// The former is what the WHERE builders now emit; the latter is the shape +// they emitted before. Both queries find the same rows; the difference is +// whether Oxigraph's planner can use the POS index. + +#[test] +fn bench_indexed_iri_vs_fn_parse_literal_filter() { + use std::time::Instant; + + // Skip in debug builds — comparing per-row function call to an index probe + // is meaningless without optimisations. + if cfg!(debug_assertions) { + eprintln!("(bench skipped — run with --release)"); + return; + } + + // Toggle scale with WT_BENCH_LINKS; 10k by default. + let n_links: usize = std::env::var("WT_BENCH_LINKS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(10_000); + + let store = SparqlStore::new(None).unwrap(); + let pred = "ns://body"; + let target_value = "needle"; + let stored_target = format!("literal:string:{}", literal_percent_encode(target_value)); + + // Seed N rows; only the last carries the matching target. + let needle_idx = n_links - 1; + for i in 0..n_links { + let source = format!("test://row/{i}"); + store + .add_link(&make_link( + &source, + "ns://type", + "ns://row", + &format!("{}", 1_700_000_000_000_i64 + i as i64), + )) + .unwrap(); + let target = if i == needle_idx { + stored_target.clone() + } else { + format!( + "literal:string:{}", + literal_percent_encode(&format!("row-{i}")) + ) + }; + store + .add_link(&make_link( + &source, + pred, + &target, + &format!("{}", 1_700_000_000_000_i64 + i as i64), + )) + .unwrap(); + } + + let indexed = format!("SELECT ?source WHERE {{ ?source <{pred}> <{stored_target}> . }}"); + let filtered = format!( + "SELECT ?source WHERE {{ \ + ?source <{pred}> ?t . \ + FILTER(STR((?t)) = \"{target_value}\") \ + }}" + ); + + // Warm-up — touch every triple under both query plans before timing. + let _ = store.query(&indexed).unwrap(); + let _ = store.query(&filtered).unwrap(); + + let runs = 5; + let mut indexed_total = std::time::Duration::ZERO; + let mut filtered_total = std::time::Duration::ZERO; + let expected = format!("test://row/{needle_idx}"); + + for _ in 0..runs { + let start = Instant::now(); + let r = store.query(&indexed).unwrap(); + indexed_total += start.elapsed(); + let rows: Vec = serde_json::from_str(&r).unwrap(); + assert_eq!(rows.len(), 1, "indexed query must return exactly 1 row"); + assert_eq!( + rows[0]["source"].as_str(), + Some(expected.as_str()), + "indexed query must return only the needle source", + ); + + let start = Instant::now(); + let r = store.query(&filtered).unwrap(); + filtered_total += start.elapsed(); + let rows: Vec = serde_json::from_str(&r).unwrap(); + assert_eq!(rows.len(), 1, "filtered query must return exactly 1 row"); + assert_eq!( + rows[0]["source"].as_str(), + Some(expected.as_str()), + "filtered query must return only the needle source", + ); + } + + let indexed_us = (indexed_total.as_secs_f64() * 1_000_000.0) / runs as f64; + let filtered_us = (filtered_total.as_secs_f64() * 1_000_000.0) / runs as f64; + let speedup = filtered_us / indexed_us; + eprintln!( + "[bench] n={n_links} indexed={indexed_us:.1}µs fn_parse_literal_filter={filtered_us:.1}µs speedup={speedup:.1}x" + ); +} diff --git a/rust-executor/src/perspectives/model_query/projection.rs b/rust-executor/src/perspectives/model_query/projection.rs index 3e8848c10..5351bd1d5 100644 --- a/rust-executor/src/perspectives/model_query/projection.rs +++ b/rust-executor/src/perspectives/model_query/projection.rs @@ -20,7 +20,10 @@ use std::collections::{BTreeMap, HashMap}; use super::types::{ ModelQueryInput, ModelShape, OrderDirection, ProjectionInput, ShapeResolver, WhereCondition, }; -use super::utils::{escape_sparql_string, validate_iri}; +use super::utils::{ + escape_sparql_string, format_literal_number, literal_percent_encode, looks_like_absolute_iri, + validate_iri, +}; use crate::perspectives::sparql_store::SparqlStore; /// Resolve all projections for a set of parent instances. @@ -286,19 +289,29 @@ pub(super) fn build_projection_where_patterns( // Resolve the target class's shape through the perspective cache so we // can translate property names in the projection's where-clause into the - // predicate IRIs they map to in the store. + // predicate IRIs they map to in the store. The second tuple element + // records whether each property carries `resolveLanguage: "literal"` — + // only that resolver stores deterministic `literal:*:` targets that we + // can probe directly. Other resolvers wrap values in author-signed + // expression IRIs, so we fall back to `fn/parse_literal` for those. let (pred_lookup, resolution_failed) = if let Some(ref target_name) = proj.target_class_name { match resolver.get_shape(target_name) { Ok(target_shape) => { - let mut map = HashMap::new(); + let mut map: HashMap = HashMap::new(); for p in &target_shape.properties { if !p.predicate.is_empty() { - map.insert(p.name.clone(), p.predicate.clone()); + map.insert( + p.name.clone(), + ( + p.predicate.clone(), + p.resolve_language.as_deref() == Some("literal"), + ), + ); } } for r in &target_shape.include_relations { if !r.predicate.is_empty() { - map.insert(r.name.clone(), r.predicate.clone()); + map.insert(r.name.clone(), (r.predicate.clone(), false)); } } (map, false) @@ -307,11 +320,11 @@ pub(super) fn build_projection_where_patterns( log::warn!( "Projection where-clause resolution failed for target '{target_name}': {e}" ); - (HashMap::::new(), true) + (HashMap::::new(), true) } } } else { - (HashMap::::new(), false) + (HashMap::::new(), false) }; // Fail closed when the projection has non-system property filters but @@ -354,8 +367,8 @@ pub(super) fn build_projection_where_patterns( continue; } - let pred = match pred_lookup.get(prop_name) { - Some(p) => p.clone(), + let (pred, is_literal_prop) = match pred_lookup.get(prop_name) { + Some(v) => (v.0.clone(), v.1), None => continue, }; @@ -368,35 +381,100 @@ pub(super) fn build_projection_where_patterns( match condition { WhereCondition::String(val) => { - let escaped = escape_sparql_string(val); - patterns.push(format!(" ?t <{pred}> ?{var} .\n")); - patterns.push(format!( - " FILTER(STR((?{var})) = \"{escaped}\")\n", - )); + if is_literal_prop { + // Match the encoded `literal:string:` form, plus the raw IRI + // form when the value is itself a valid absolute IRI — same + // dual-shape that the model-query where-clause emits for + // constructor-seeded raw URIs on literal properties. + let encoded = literal_percent_encode(val); + let mut iris = vec![format!("")]; + if looks_like_absolute_iri(val) { + iris.push(format!("<{val}>")); + } + patterns.push(format!(" VALUES ?{var} {{ {} }}\n", iris.join(" "))); + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + } else { + let escaped = escape_sparql_string(val); + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + patterns.push(format!( + " FILTER(STR((?{var})) = \"{escaped}\")\n", + )); + } } WhereCondition::Bool(b) => { - let bval = if *b { "true" } else { "false" }; - patterns.push(format!(" ?t <{pred}> ?{var} .\n")); - patterns.push(format!( - " FILTER(STR((?{var})) = \"{bval}\")\n", - )); + if is_literal_prop { + patterns.push(format!(" ?t <{pred}> .\n")); + } else { + let bval = if *b { "true" } else { "false" }; + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + patterns.push(format!( + " FILTER(STR((?{var})) = \"{bval}\")\n", + )); + } } WhereCondition::Number(n) => { - patterns.push(format!(" ?t <{pred}> ?{var} .\n")); - patterns.push(format!( - " FILTER(STR((?{var})) = \"{n}\")\n", - )); + if is_literal_prop { + if let Some(num_str) = format_literal_number(*n) { + patterns.push(format!(" ?t <{pred}> .\n")); + } else { + patterns.push(" FILTER(false)\n".to_string()); + } + } else { + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + patterns.push(format!( + " FILTER(STR((?{var})) = \"{n}\")\n", + )); + } } WhereCondition::StringArray(vals) => { - let list = vals - .iter() - .map(|v| format!("\"{}\"", escape_sparql_string(v))) - .collect::>() - .join(", "); - patterns.push(format!(" ?t <{pred}> ?{var} .\n")); - patterns.push(format!( - " FILTER(STR((?{var})) IN ({list}))\n", - )); + if is_literal_prop { + let mut iris: Vec = Vec::with_capacity(vals.len() * 2); + for v in vals { + iris.push(format!("", literal_percent_encode(v))); + if looks_like_absolute_iri(v) { + iris.push(format!("<{v}>")); + } + } + patterns.push(format!(" VALUES ?{var} {{ {} }}\n", iris.join(" "))); + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + } else { + let list = vals + .iter() + .map(|v| format!("\"{}\"", escape_sparql_string(v))) + .collect::>() + .join(", "); + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + patterns.push(format!( + " FILTER(STR((?{var})) IN ({list}))\n", + )); + } + } + WhereCondition::NumberArray(vals) => { + if is_literal_prop { + let iris = vals + .iter() + .filter_map(|n| { + format_literal_number(*n).map(|s| format!("")) + }) + .collect::>() + .join(" "); + if iris.is_empty() { + patterns.push(" FILTER(false)\n".to_string()); + } else { + patterns.push(format!(" VALUES ?{var} {{ {iris} }}\n")); + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + } + } else { + let list = vals + .iter() + .map(|n| format!("\"{n}\"")) + .collect::>() + .join(", "); + patterns.push(format!(" ?t <{pred}> ?{var} .\n")); + patterns.push(format!( + " FILTER(STR((?{var})) IN ({list}))\n", + )); + } } _ => {} } diff --git a/rust-executor/src/perspectives/model_query/sparql_builder.rs b/rust-executor/src/perspectives/model_query/sparql_builder.rs index ac3d80725..5f922de49 100644 --- a/rust-executor/src/perspectives/model_query/sparql_builder.rs +++ b/rust-executor/src/perspectives/model_query/sparql_builder.rs @@ -19,7 +19,10 @@ use super::types::{ InstanceQueryPlan, ModelQueryInput, ModelShape, OrderDirection, ParentScope, SortKey, SparqlPagination, WhereCondition, }; -use super::utils::{escape_sparql_string, validate_iri}; +use super::utils::{ + escape_sparql_string, format_literal_number, literal_percent_encode, looks_like_absolute_iri, + validate_iri, +}; /// Build a targeted reifier timestamp probe for the pagination sub-query. /// @@ -506,17 +509,32 @@ pub(super) fn build_query_patterns( continue; } let safe_name = prop_name.replace(|c: char| !c.is_alphanumeric(), "_"); - let is_literal_prop = prop.resolve_language.is_some(); + // Only `resolveLanguage: "literal"` stores deterministic + // `literal:*` targets we can probe directly. Other resolvers + // wrap values in author-signed expression IRIs, so they fall + // through to the FILTER-on-decoded path below. + let is_literal_prop = prop.resolve_language.as_deref() == Some("literal"); match condition { WhereCondition::String(val) => { if is_literal_prop { - let var = format!("?_pw_{safe_name}"); - where_patterns - .push(format!(" ?source <{}> {var} .", prop.predicate)); - where_patterns.push(format!( - " FILTER(STR(({var})) = \"{}\")", - escape_sparql_string(val) - )); + // Match the deterministic `literal:string:` form of the + // value directly against the indexed object position. UNION the + // raw-IRI form when the where-value itself parses as an absolute + // IRI — constructors can seed a literal-resolveLanguage property + // with a raw URI (enum-style initial values) which the storage + // layer preserves as-is. + let encoded = literal_percent_encode(val); + if looks_like_absolute_iri(val) { + where_patterns.push(format!( + " {{ ?source <{0}> . }} UNION {{ ?source <{0}> <{val}> . }}", + prop.predicate + )); + } else { + where_patterns.push(format!( + " ?source <{}> .", + prop.predicate + )); + } } else if validate_iri(val).is_ok() { where_patterns .push(format!(" ?source <{}> <{val}> .", prop.predicate)); @@ -531,42 +549,107 @@ pub(super) fn build_query_patterns( } } WhereCondition::Number(n) => { - let var = format!("?_pw_{safe_name}"); - where_patterns.push(format!(" ?source <{}> {var} .", prop.predicate)); - where_patterns.push(format!( - " FILTER(STR(({var})) = \"{n}\")" - )); + if is_literal_prop { + // Non-finite filter values cannot be stored as a `literal:number:` + // target, so emit a never-matching pattern instead of a malformed IRI. + if let Some(num_str) = format_literal_number(*n) { + where_patterns.push(format!( + " ?source <{}> .", + prop.predicate + )); + } else { + where_patterns.push(" FILTER(false)".to_string()); + } + } else { + let var = format!("?_pw_{safe_name}"); + where_patterns + .push(format!(" ?source <{}> {var} .", prop.predicate)); + where_patterns.push(format!( + " FILTER(STR(({var})) = \"{n}\")" + )); + } } WhereCondition::Bool(b) => { - let var = format!("?_pw_{safe_name}"); - where_patterns.push(format!(" ?source <{}> {var} .", prop.predicate)); - where_patterns.push(format!( - " FILTER(STR(({var})) = \"{b}\")" - )); + if is_literal_prop { + where_patterns.push(format!( + " ?source <{}> .", + prop.predicate + )); + } else { + let var = format!("?_pw_{safe_name}"); + where_patterns + .push(format!(" ?source <{}> {var} .", prop.predicate)); + where_patterns.push(format!( + " FILTER(STR(({var})) = \"{b}\")" + )); + } } WhereCondition::StringArray(vals) => { - let values_list = vals - .iter() - .map(|v| format!("\"{}\"", escape_sparql_string(v))) - .collect::>() - .join(", "); - let var = format!("?_pw_{safe_name}"); - where_patterns.push(format!(" ?source <{}> {var} .", prop.predicate)); - where_patterns.push(format!( - " FILTER(STR(({var})) IN ({values_list}))" - )); + if is_literal_prop { + // Same shape as the single-value String branch above, expanded + // into a VALUES set: one or two IRIs per input value. + let mut iris: Vec = Vec::with_capacity(vals.len() * 2); + for v in vals { + iris.push(format!( + "", + literal_percent_encode(v) + )); + if looks_like_absolute_iri(v) { + iris.push(format!("<{v}>")); + } + } + let iv_var = format!("?_iv_{safe_name}"); + where_patterns + .push(format!(" VALUES {iv_var} {{ {} }}", iris.join(" "))); + where_patterns + .push(format!(" ?source <{}> {iv_var} .", prop.predicate)); + } else { + let values_list = vals + .iter() + .map(|v| format!("\"{}\"", escape_sparql_string(v))) + .collect::>() + .join(", "); + let var = format!("?_pw_{safe_name}"); + where_patterns + .push(format!(" ?source <{}> {var} .", prop.predicate)); + where_patterns.push(format!( + " FILTER(STR(({var})) IN ({values_list}))" + )); + } } WhereCondition::NumberArray(vals) => { - let values_list = vals - .iter() - .map(|n| format!("\"{n}\"")) - .collect::>() - .join(", "); - let var = format!("?_pw_{safe_name}"); - where_patterns.push(format!(" ?source <{}> {var} .", prop.predicate)); - where_patterns.push(format!( - " FILTER(STR(({var})) IN ({values_list}))" - )); + if is_literal_prop { + // Non-finite values are silently dropped from the VALUES set; if + // none remain, fall through to a never-matching pattern. + let iris = vals + .iter() + .filter_map(|n| { + format_literal_number(*n) + .map(|s| format!("")) + }) + .collect::>() + .join(" "); + if iris.is_empty() { + where_patterns.push(" FILTER(false)".to_string()); + } else { + let iv_var = format!("?_iv_{safe_name}"); + where_patterns.push(format!(" VALUES {iv_var} {{ {iris} }}")); + where_patterns + .push(format!(" ?source <{}> {iv_var} .", prop.predicate)); + } + } else { + let values_list = vals + .iter() + .map(|n| format!("\"{n}\"")) + .collect::>() + .join(", "); + let var = format!("?_pw_{safe_name}"); + where_patterns + .push(format!(" ?source <{}> {var} .", prop.predicate)); + where_patterns.push(format!( + " FILTER(STR(({var})) IN ({values_list}))" + )); + } } WhereCondition::Ops(ops) => { let var = format!("?_pw_{safe_name}"); diff --git a/rust-executor/src/perspectives/model_query/utils.rs b/rust-executor/src/perspectives/model_query/utils.rs index 73b3c09f3..549a73ecc 100644 --- a/rust-executor/src/perspectives/model_query/utils.rs +++ b/rust-executor/src/perspectives/model_query/utils.rs @@ -6,7 +6,6 @@ //! coercion helpers used by the filtering engine. use deno_core::anyhow::{anyhow, Error}; -#[cfg(test)] use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use serde_json::Value; @@ -19,11 +18,27 @@ use serde_json::Value; /// Uses `NON_ALPHANUMERIC` percent-encoding, matching `literal_encode` in /// `languages/literal.rs`. `urlencoding::encode` uses RFC 3986 unreserved /// chars (keeps `.-_~`), which diverges from the storage encoding. -#[cfg(test)] pub(super) fn literal_percent_encode(s: &str) -> String { utf8_percent_encode(s, NON_ALPHANUMERIC).to_string() } +/// Format a finite f64 for the `literal:number:` IRI tail, mirroring +/// `literal_encode` in `languages/literal.rs`: integers render without a +/// fractional part (e.g. `42`), floats use `{}` formatting (e.g. `3.14`). +/// +/// Returns `None` if the value is non-finite (NaN or +/- infinity) — these +/// are rejected so we never emit a malformed IRI into the SPARQL. +pub(super) fn format_literal_number(n: f64) -> Option { + if !n.is_finite() { + return None; + } + if n.fract() == 0.0 && n.abs() < (i64::MAX as f64) { + Some(format!("{}", n as i64)) + } else { + Some(format!("{n}")) + } +} + /// Escape a string value for use inside a SPARQL string literal (double-quoted). pub(super) fn escape_sparql_string(s: &str) -> String { s.replace('\\', "\\\\") @@ -33,15 +48,35 @@ pub(super) fn escape_sparql_string(s: &str) -> String { .replace('\t', "\\t") } +/// Cheap heuristic for "this string is plausibly an absolute IRI" — passes +/// `validate_iri`, has a `:`, and starts with an ASCII letter. Used to decide +/// whether a where-value is safe to emit as a `<…>` IRIREF; rejects bare +/// strings like `"active"` that would produce un-parseable SPARQL. +pub(super) fn looks_like_absolute_iri(s: &str) -> bool { + if validate_iri(s).is_err() { + return false; + } + let Some(colon_idx) = s.find(':') else { + return false; + }; + if colon_idx == 0 { + return false; + } + let first = s.as_bytes()[0]; + first.is_ascii_alphabetic() +} + /// Validate a value for use inside an IRI `<…>`. Rejects characters that -/// would break or inject into a SPARQL IRI token. +/// would break or inject into a SPARQL IRI token, including all control and +/// whitespace characters (e.g. `\n`, `\r`, `\t`, U+00A0) which `validate_iri` +/// previously let through and which would emit malformed `<…>` IRIREFs. pub(super) fn validate_iri(s: &str) -> Result<&str, Error> { - if s.contains('>') + if s.chars().any(|c| c.is_control() || c.is_whitespace()) + || s.contains('>') || s.contains('<') || s.contains('{') || s.contains('}') || s.contains('"') - || s.contains(' ') { return Err(anyhow!("Invalid IRI component: '{}'", s)); } @@ -55,12 +90,8 @@ pub(super) const MAX_INCLUDE_DEPTH: u8 = 8; // literal: URI parsing (typed) // --------------------------------------------------------------------------- -/// Parse a `literal:` URI into a typed JSON value. -/// Returns the raw string as Value::String if not a literal: URI. -/// -/// Since the signed-envelope migration (v3), all literal values are stored -/// as plain `literal:string:X`, `literal:number:X`, `literal:boolean:X`, -/// or `literal:json:X` (for non-envelope JSON objects/arrays). +/// Parse a `literal:` URI into a typed JSON value, or return the input as a +/// string when it is not a literal URI. pub(super) fn parse_literal_value(uri: &str) -> Value { let body = if let Some(rest) = uri.strip_prefix("literal:") { rest @@ -90,7 +121,10 @@ pub(super) fn parse_literal_value(uri: &str) -> Value { } else if let Some(rest) = body.strip_prefix("json:") { let decoded = urlencoding::decode(rest).unwrap_or_else(|_| rest.into()); if let Ok(json_val) = serde_json::from_str::(&decoded) { - // For signed expression envelopes, extract .data + // Unwrap signed-expression envelopes (`{author, timestamp, data, proof}`) + // to the inner `.data` so consumers always see the underlying value + // rather than the wrapper, regardless of whether the target was + // written as a plain literal or as a signed expression. if let Some(data) = json_val.get("data") { if json_val.get("author").is_some() && json_val.get("proof").is_some() { return data.clone(); diff --git a/rust-executor/src/perspectives/perspective_instance.rs b/rust-executor/src/perspectives/perspective_instance.rs index 030d98674..6963c6904 100644 --- a/rust-executor/src/perspectives/perspective_instance.rs +++ b/rust-executor/src/perspectives/perspective_instance.rs @@ -4020,7 +4020,33 @@ impl PerspectiveInstance { .await?; if let Some(resolve_language) = resolve_language { - // Create an expression for the value + // The built-in "literal" language produces a deterministic plain URI + // (`literal:string:X` / `:number:` / `:boolean:` / `:json:`) directly + // from the value. Routing through `expression_create` would wrap the + // value in a signed envelope whose IRI depends on author+timestamp, + // making property values non-deterministic and breaking exact-match + // SPARQL WHERE filters. Provenance for the link as a whole already + // lives on the reifier; the literal payload doesn't need its own. + if resolve_language == "literal" { + // Mirror the no-resolveLanguage branch below + the TS-side + // `valueToLiteralIri`: strings that already carry a URI + // scheme are stored as-is so they round-trip through the + // WHERE builders' `<…>` IRI probes without wrapping. Other + // values flow through the canonical `literal_encode` so + // migrated rows and fresh writes share one IRI shape. + if let serde_json::Value::String(s) = value { + static URI_SCHEME_RE: std::sync::OnceLock = + std::sync::OnceLock::new(); + let re = URI_SCHEME_RE + .get_or_init(|| regex::Regex::new(r"^[a-zA-Z][a-zA-Z0-9+\-._]*:").unwrap()); + if re.is_match(s) { + return Ok(s.clone()); + } + } + let encoded = crate::languages::literal_encode(value); + return Ok(format!("literal:{}", encoded)); + } + let controller = crate::languages::LanguageController::global_instance(); let agent_context = context.clone(); match controller diff --git a/rust-executor/src/perspectives/sparql_store.rs b/rust-executor/src/perspectives/sparql_store.rs index d5cc2a9ba..c4a0293f9 100644 --- a/rust-executor/src/perspectives/sparql_store.rs +++ b/rust-executor/src/perspectives/sparql_store.rs @@ -82,8 +82,10 @@ fn parse_literal_fn(args: &[Term]) -> Option { Some(Literal::new_simple_literal(rest).into()) } else if let Some(rest) = body.strip_prefix("json:") { let decoded = urlencoding::decode(rest).unwrap_or_else(|_| rest.into()); - // For JSON literals that are signed expressions (contain "data" field), - // extract just the data field value for content matching. + // Unwrap signed-expression envelopes (`{author, timestamp, data, proof}`) + // so WHERE filters can compare against the inner content. Required for + // pre-migration link stores and for the small set of expressions that + // are themselves stored as `literal:json:` (e.g. entanglement proofs). if let Ok(json_val) = serde_json::from_str::(&decoded) { if let Some(data) = json_val.get("data") { let data_str = match data { @@ -1118,6 +1120,284 @@ impl SparqlStore { ); Ok(count) } + + /// Rewrite link targets shaped like `literal:json:` to the + /// plain typed form of their inner `.data` value (`literal:string:` / + /// `:number:` / `:boolean:` / `:json:`). + /// + /// Per-link provenance lives on the RDF 1.2 reifier; the envelope-on-target + /// form duplicates that and produces non-deterministic IRIs (the envelope + /// signature varies per write) which exact-match WHERE filters can't index. + /// The reifier IRI hashes the target, so rewriting the target requires + /// rebuilding both the direct triple and the reifier with all its metadata. + pub fn migrate_signed_envelopes_to_plain_literals(&self) -> Result { + if self.migration_version() >= 3 { + return Ok(0); + } + + log::info!("Migrating signed-envelope literal targets to plain literal form"); + + use percent_encoding::percent_decode_str; + + let rdf_reifies = NamedNodeRef::new_unchecked(RDF_REIFIES); + let ont_author = NamedNodeRef::new_unchecked(ONT_AUTHOR); + let ont_timestamp = NamedNodeRef::new_unchecked(ONT_TIMESTAMP); + let ont_proof_key = NamedNodeRef::new_unchecked(ONT_PROOF_KEY); + let ont_proof_sig = NamedNodeRef::new_unchecked(ONT_PROOF_SIG); + let ont_proof_valid = NamedNodeRef::new_unchecked(ONT_PROOF_VALID); + let ont_status = NamedNodeRef::new_unchecked(ONT_STATUS); + + // Collect all reifier quads with their triple terms + let reifier_quads: Vec = self + .store + .quads_for_pattern( + None, + Some(rdf_reifies), + None, + Some(GraphNameRef::DefaultGraph), + ) + .filter_map(|r| r.ok()) + .collect(); + + struct LinkToMigrate { + reifier_iri: NamedNode, + source: String, + predicate: String, + old_target: String, + new_target: String, + author: String, + timestamp: String, + proof_key: String, + proof_sig: String, + proof_valid: String, + status: String, + } + + let mut links_to_migrate: Vec = Vec::new(); + + for quad in &reifier_quads { + // Extract the triple term from the reifier + let (source, predicate, old_target) = match &quad.object { + Term::Triple(t) => { + let s = match &t.subject { + NamedOrBlankNode::NamedNode(n) => n.as_str().to_string(), + _ => continue, + }; + let p = t.predicate.as_str().to_string(); + let o = match &t.object { + Term::NamedNode(n) => n.as_str().to_string(), + _ => continue, + }; + (s, p, o) + } + _ => continue, + }; + + // Check if the target is a signed envelope literal:json: + if !old_target.starts_with("literal:json:") { + continue; + } + + // Try to decode and check for signed expression envelope + let json_part = &old_target["literal:json:".len()..]; + let decoded = match percent_decode_str(json_part).decode_utf8() { + Ok(d) => d.to_string(), + Err(_) => continue, + }; + + let json_val: serde_json::Value = match serde_json::from_str(&decoded) { + Ok(v) => v, + Err(_) => continue, + }; + + // Only migrate if it has the signed expression envelope shape + let data = match json_val.get("data") { + Some(d) if json_val.get("author").is_some() && json_val.get("proof").is_some() => { + d.clone() + } + _ => continue, // Not a signed envelope, leave as-is + }; + + // Encode the inner `data` field as a plain literal IRI using the + // same canonical encoder that fresh writes flow through + // (`perspective_instance::link_target_for_value` → `literal_encode`). + // Hand-rolling the format here used to drift (e.g. integer-shaped + // floats landed as `literal:number:1.0` while the WHERE builders + // probed `literal:number:1`), causing migrated rows to silently + // miss equality filters. + let new_target = format!("literal:{}", crate::languages::literal_encode(&data)); + + if new_target == old_target { + continue; // No change needed + } + + // Read reifier metadata + let reifier_iri = match &quad.subject { + NamedOrBlankNode::NamedNode(n) => n.clone(), + _ => continue, + }; + + let get_meta = |pred: NamedNodeRef| -> String { + self.store + .quads_for_pattern( + Some(reifier_iri.as_ref().into()), + Some(pred), + None, + Some(GraphNameRef::DefaultGraph), + ) + .filter_map(|r| r.ok()) + .next() + .map(|q| match &q.object { + Term::Literal(l) => l.value().to_string(), + Term::NamedNode(n) => n.as_str().to_string(), + _ => String::new(), + }) + .unwrap_or_default() + }; + + let author = get_meta(ont_author); + let timestamp = get_meta(ont_timestamp); + let proof_key = get_meta(ont_proof_key); + let proof_sig = get_meta(ont_proof_sig); + let proof_valid = get_meta(ont_proof_valid); + let status = get_meta(ont_status); + + links_to_migrate.push(LinkToMigrate { + reifier_iri, + source, + predicate, + old_target, + new_target, + author, + timestamp, + proof_key, + proof_sig, + proof_valid, + status, + }); + } + + let count = links_to_migrate.len(); + if count == 0 { + self.set_migration_version(3)?; + return Ok(0); + } + + log::info!( + "Migrating {} signed expression envelopes to plain literals...", + count + ); + + for link in &links_to_migrate { + let source_iri = NamedNode::new_unchecked(&link.source); + let predicate_iri = NamedNode::new_unchecked(&link.predicate); + let old_target_iri = NamedNode::new_unchecked(&link.old_target); + let new_target_iri = NamedNode::new_unchecked(&link.new_target); + + // 1. Insert new direct triple + self.store.insert(QuadRef::new( + source_iri.as_ref(), + predicate_iri.as_ref(), + TermRef::NamedNode(new_target_iri.as_ref()), + GraphNameRef::DefaultGraph, + ))?; + + // 2. Build new reifier IRI (hash changes because target changed) + let new_reifier_iri = { + let mut hasher = Sha256::new(); + hasher.update(link.author.as_bytes()); + hasher.update(link.source.as_bytes()); + hasher.update(link.predicate.as_bytes()); + hasher.update(link.new_target.as_bytes()); + hasher.update(link.timestamp.as_bytes()); + let hash = hex::encode(hasher.finalize()); + NamedNode::new_unchecked(format!("link:{}", &hash[..32])) + }; + + // 3. Insert new reifier triple + let new_triple = Triple::new( + source_iri.clone(), + predicate_iri.clone(), + new_target_iri.clone(), + ); + self.store.insert(QuadRef::new( + new_reifier_iri.as_ref(), + rdf_reifies, + TermRef::Triple(&new_triple), + GraphNameRef::DefaultGraph, + ))?; + + // 4. Insert metadata on new reifier + let annotations: &[(&str, &str)] = &[ + (ONT_AUTHOR, &link.author), + (ONT_TIMESTAMP, &link.timestamp), + (ONT_PROOF_KEY, &link.proof_key), + (ONT_PROOF_SIG, &link.proof_sig), + (ONT_PROOF_VALID, &link.proof_valid), + (ONT_STATUS, &link.status), + ]; + for (pred_uri, value) in annotations { + if value.is_empty() { + continue; + } + let pred = NamedNodeRef::new_unchecked(pred_uri); + let lit = literal(value); + self.store.insert(QuadRef::new( + new_reifier_iri.as_ref(), + pred, + TermRef::Literal(lit.as_ref()), + GraphNameRef::DefaultGraph, + ))?; + } + + // 5. Remove old reifier metadata + let old_meta: Vec = self + .store + .quads_for_pattern( + Some(link.reifier_iri.as_ref().into()), + None, + None, + Some(GraphNameRef::DefaultGraph), + ) + .filter_map(|r| r.ok()) + .collect(); + for q in &old_meta { + self.store.remove(q)?; + } + + // 6. Remove old direct triple (only if no other reifier references it) + let old_triple = Triple::new( + source_iri.clone(), + predicate_iri.clone(), + old_target_iri.clone(), + ); + let other_reifiers = self + .store + .quads_for_pattern( + None, + Some(rdf_reifies), + Some(TermRef::Triple(&old_triple)), + Some(GraphNameRef::DefaultGraph), + ) + .any(|r| r.is_ok()); + if !other_reifiers { + let _ = self.store.remove(&Quad::new( + source_iri, + predicate_iri, + old_target_iri, + GraphName::DefaultGraph, + )); + } + } + + self.set_migration_version(3)?; + + log::info!( + "Migration complete: {} signed envelopes converted to plain literals", + count + ); + Ok(count) + } } #[cfg(test)] diff --git a/tests/js/tests/mcp-http.test.ts b/tests/js/tests/mcp-http.test.ts index f0b77875c..78901d621 100644 --- a/tests/js/tests/mcp-http.test.ts +++ b/tests/js/tests/mcp-http.test.ts @@ -998,8 +998,12 @@ describe("MCP HTTP Flux Chat Integration Test", function() { }); // ======================================================================== - // 5b. Resolve Language — verify properties with resolve_language produce - // proper literal:json: expressions instead of literal:string: + // 5b. Resolve Language — properties with resolveLanguage: "literal" must + // produce deterministic literal:boolean: / literal:number: / + // literal:string: targets (NOT the legacy literal:json: form). + // Per-link provenance lives on the RDF 1.2 reifier, so wrapping each + // property value in a signed expression envelope duplicates that and + // defeats indexed equality lookups in the WHERE builder. // ======================================================================== describe("5b. Resolve Language for Boolean/String Properties", function() { @@ -1019,7 +1023,7 @@ describe("MCP HTTP Flux Chat Integration Test", function() { console.log("channel_create with booleans:", resultStr); }); - it("should store boolean properties as literal:json: expressions, not literal:string:", async function() { + it("should store boolean properties as deterministic literal:boolean: targets", async function() { // Query the raw links to verify the encoding format var links = await callMcpTool(MCP_BASE_URL,'query_links', { perspective_id: perspectiveUuid, @@ -1028,17 +1032,18 @@ describe("MCP HTTP Flux Chat Integration Test", function() { }, mcpSessionId); console.log("isConversation links:", JSON.stringify(links)); - // The target should be a literal:json: expression (signed expression), - // NOT literal:string:false + // The target should be the deterministic literal:boolean: form. + // The legacy literal:json: form is no longer + // produced — provenance is carried by the reifier instead. var linksArr = Array.isArray(links) ? links : (links.links || []); expect(linksArr.length).to.be.greaterThan(0); var target = linksArr[0].data?.target || linksArr[0].target || ''; console.log("isConversation target:", target); - expect(target).to.not.include("literal:string:false"); - expect(target).to.include("literal:json:"); + expect(target).to.not.include("literal:json:"); + expect(target).to.equal("literal:boolean:false"); }); - it("should store string properties as literal:json: expressions when resolve_language is set", async function() { + it("should store string properties as deterministic literal:string: targets when resolve_language is set", async function() { var links = await callMcpTool(MCP_BASE_URL,'query_links', { perspective_id: perspectiveUuid, source: resolveTestChannelAddr, @@ -1050,8 +1055,9 @@ describe("MCP HTTP Flux Chat Integration Test", function() { expect(linksArr.length).to.be.greaterThan(0); var target = linksArr[0].data?.target || linksArr[0].target || ''; console.log("name target:", target); - // Should be a signed expression (literal:json:) not a raw string literal - expect(target).to.include("literal:json:"); + // Deterministic literal:string: form, not a signed envelope. + expect(target).to.not.include("literal:json:"); + expect(target).to.match(/^literal:string:/); }); it("should resolve boolean values via channel_set_isconversation", async function() { @@ -1062,7 +1068,7 @@ describe("MCP HTTP Flux Chat Integration Test", function() { }, mcpSessionId); expect(result.success).to.be.true; - // Verify the stored link target is a proper expression + // Verify the stored link target is the deterministic literal:boolean: form. var links = await callMcpTool(MCP_BASE_URL,'query_links', { perspective_id: perspectiveUuid, source: resolveTestChannelAddr, @@ -1072,8 +1078,8 @@ describe("MCP HTTP Flux Chat Integration Test", function() { expect(linksArr.length).to.be.greaterThan(0); var target = linksArr[0].data?.target || linksArr[0].target || ''; console.log("Updated isConversation target:", target); - expect(target).to.not.include("literal:string:true"); - expect(target).to.include("literal:json:"); + expect(target).to.not.include("literal:json:"); + expect(target).to.equal("literal:boolean:true"); }); it("should resolve string values via set_subject_property with resolve_language", async function() { @@ -1086,7 +1092,7 @@ describe("MCP HTTP Flux Chat Integration Test", function() { }, mcpSessionId); expect(result.success).to.be.true; - // Verify the stored link target uses literal:json: (signed expression) + // Verify the stored link target is the deterministic literal:string: form. var links = await callMcpTool(MCP_BASE_URL,'query_links', { perspective_id: perspectiveUuid, source: resolveTestChannelAddr, @@ -1096,7 +1102,8 @@ describe("MCP HTTP Flux Chat Integration Test", function() { expect(linksArr.length).to.be.greaterThan(0); var target = linksArr[0].data?.target || linksArr[0].target || ''; console.log("description target:", target); - expect(target).to.include("literal:json:"); + expect(target).to.not.include("literal:json:"); + expect(target).to.match(/^literal:string:/); }); it("should resolve boolean values via channel_update (dynamic update)", async function() { @@ -1116,8 +1123,8 @@ describe("MCP HTTP Flux Chat Integration Test", function() { expect(linksArr.length).to.be.greaterThan(0); var target = linksArr[0].data?.target || linksArr[0].target || ''; console.log("Updated isPinned target:", target); - expect(target).to.not.include("literal:string:false"); - expect(target).to.include("literal:json:"); + expect(target).to.not.include("literal:json:"); + expect(target).to.equal("literal:boolean:false"); }); }); diff --git a/tests/js/tests/prolog-and-literals.test.ts b/tests/js/tests/prolog-and-literals.test.ts index 9a1e55335..a653ea5ab 100644 --- a/tests/js/tests/prolog-and-literals.test.ts +++ b/tests/js/tests/prolog-and-literals.test.ts @@ -299,7 +299,7 @@ describe("Prolog + Literals", () => { let links = await perspective!.get(new LinkQuery({source: todo.id, predicate: "todo://has_title"})) expect(links.length).to.equal(1) let literal = Literal.fromUrl(links[0].data.target).get() - expect(literal.data).to.equal("new title") + expect(literal).to.equal("new title") }) it("can easily be initialized with PerspectiveProxy.ensureSDNASubjectClass()", async () => { @@ -646,7 +646,7 @@ describe("Prolog + Literals", () => { let links = await perspective!.get(new LinkQuery({source: root, predicate: "recipe://resolve"})) expect(links.length).to.equal(1) let literal = Literal.fromUrl(links[0].data.target).get() - expect(literal.data).to.equal(recipe.resolve) + expect(literal).to.equal(recipe.resolve) const recipe3 = new Recipe(perspective!, root); await recipe3.get(); @@ -707,8 +707,8 @@ describe("Prolog + Literals", () => { let linksResolve = await perspective!.get(new LinkQuery({source: root, predicate: "recipe://resolve"})) expect(linksResolve.length).to.equal(1) - let expression = Literal.fromUrl(linksResolve[0].data.target).get() - expect(expression.data).to.equal(longName) + let literal = Literal.fromUrl(linksResolve[0].data.target).get() + expect(literal).to.equal(longName) const recipe2 = new Recipe(perspective!, root) await recipe2.get() @@ -2206,10 +2206,20 @@ describe("Prolog + Literals", () => { await model.save(); const saveTime = Date.now(); - // Poll until callback called + // Poll until callback called. 60s upper bound matches + // the surrounding waitForCondition timeouts in this + // suite. Even with the previous 30s ceiling the test + // still flaked on integration-tests-js #17171 after + // the dev merge pulled in the lazy-load resolveLanguage + // change (#848), which adds first-fetch latency on a + // freshly registered SDNA class. Steady-state + // subscription latency is still logged via + // `subscriptionLatency`, so a real regression would + // surface as a slow log line rather than be hidden by + // the bumped ceiling. while (!subscriptionCallback.called) { await sleep(10); - if (Date.now() - saveTime > 5000) throw new Error("Timeout waiting for subscription update"); + if (Date.now() - saveTime > 60000) throw new Error("Timeout waiting for subscription update"); } const saveLatency = saveTime - start;