From 65e75bde835e4c220c6eb1f8da9146657716e67e Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 13:33:41 -0400 Subject: [PATCH 01/10] toolpath-convo: add SessionBase, FileMutation, view.base/extra, tool.file_mutations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Additive data-model changes on `ConversationView` and `ToolInvocation` to prepare for provider unification — providers will populate these in their `to_view` so `derive_path` can be the single canonical `ConversationView -> Path` projection. New on `ConversationView`: - `base: Option` — path-level base (working_dir, vcs_revision, vcs_branch, vcs_remote). Will project to `Path.base`. - `extra: HashMap` — path-level provider-namespaced extras. Will project to `Path.meta.extra`. New on `ToolInvocation`: - `file_mutations: Vec` — resolved file mutations (path, operation, raw_diff, before, after) computed by the provider's `to_view`. Will project to sibling `file.write` change entries. `ConversationView` and `ToolInvocation` now derive `Default` so the new fields can be omitted from struct literals with `..Default::default()`. Existing call sites updated. No behavior change yet — `derive_path` / `extract_conversation` don't project the new fields; providers don't populate them. Wires up next. --- crates/toolpath-claude/src/project.rs | 19 ++++--- crates/toolpath-claude/src/provider.rs | 4 ++ crates/toolpath-codex/src/project.rs | 12 ++-- crates/toolpath-codex/src/provider.rs | 2 + crates/toolpath-convo/src/derive.rs | 11 ++-- crates/toolpath-convo/src/extract.rs | 14 +---- crates/toolpath-convo/src/lib.rs | 70 +++++++++++++++++++++++- crates/toolpath-convo/src/project.rs | 15 +++-- crates/toolpath-gemini/src/project.rs | 5 ++ crates/toolpath-gemini/src/provider.rs | 2 + crates/toolpath-opencode/src/project.rs | 4 ++ crates/toolpath-opencode/src/provider.rs | 2 + crates/toolpath-pi/src/project.rs | 3 + crates/toolpath-pi/src/provider.rs | 3 + 14 files changed, 124 insertions(+), 42 deletions(-) diff --git a/crates/toolpath-claude/src/project.rs b/crates/toolpath-claude/src/project.rs index 59f0e5f..849347f 100644 --- a/crates/toolpath-claude/src/project.rs +++ b/crates/toolpath-claude/src/project.rs @@ -32,14 +32,7 @@ use toolpath_convo::{ /// /// let view = ConversationView { /// id: "my-session".to_string(), -/// started_at: None, -/// last_activity: None, -/// turns: vec![], -/// total_usage: None, -/// provider_id: None, -/// files_changed: vec![], -/// session_ids: vec![], -/// events: vec![], +/// ..Default::default() /// }; /// /// let projector = ClaudeProjector; @@ -1016,6 +1009,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -1140,6 +1134,7 @@ mod tests { input: serde_json::json!({"file_path": "src/main.rs"}), result: None, category: None, + ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1211,6 +1206,7 @@ mod tests { is_error: false, }), category: None, + ..Default::default() }]; let view = make_view("sess-1", vec![user_turn("u1", "Go"), turn]); @@ -1259,6 +1255,7 @@ mod tests { input: serde_json::json!({}), result: None, // no result category: None, + ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1359,6 +1356,7 @@ mod tests { input: serde_json::json!({"command": "ls"}), result: None, category: None, + ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1390,6 +1388,7 @@ mod tests { is_error: false, }), category: None, + ..Default::default() }, ToolInvocation { id: "t2".to_string(), @@ -1400,6 +1399,7 @@ mod tests { is_error: true, }), category: None, + ..Default::default() }, ]; @@ -1466,6 +1466,7 @@ mod tests { is_error: false, }), category: None, + ..Default::default() }, ToolInvocation { id: "t2".to_string(), @@ -1473,6 +1474,7 @@ mod tests { input: serde_json::json!({}), result: None, // no result for this one category: None, + ..Default::default() }, ]; @@ -1599,6 +1601,7 @@ mod tests { is_error: false, }), category: None, + ..Default::default() }]; let view = make_view("sess-1", vec![turn]); diff --git a/crates/toolpath-claude/src/provider.rs b/crates/toolpath-claude/src/provider.rs index 72b793e..1895733 100644 --- a/crates/toolpath-claude/src/provider.rs +++ b/crates/toolpath-claude/src/provider.rs @@ -94,6 +94,7 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { input: tu.input.clone(), result, category, + ..Default::default() } }) .collect(); @@ -310,6 +311,7 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { files_changed, session_ids: vec![], events, + ..Default::default() } } @@ -1046,6 +1048,7 @@ mod tests { input: serde_json::json!({}), result: None, category: Some(ToolCategory::FileRead), + ..Default::default() }, ToolInvocation { id: "tool-b".into(), @@ -1053,6 +1056,7 @@ mod tests { input: serde_json::json!({}), result: None, category: Some(ToolCategory::FileWrite), + ..Default::default() }, ], model: None, diff --git a/crates/toolpath-codex/src/project.rs b/crates/toolpath-codex/src/project.rs index 209db99..1bcf007 100644 --- a/crates/toolpath-codex/src/project.rs +++ b/crates/toolpath-codex/src/project.rs @@ -53,14 +53,8 @@ use crate::types::{ /// /// let view = ConversationView { /// id: "session-uuid".into(), -/// started_at: None, -/// last_activity: None, -/// turns: vec![], -/// total_usage: None, /// provider_id: Some("codex".into()), -/// files_changed: vec![], -/// session_ids: vec![], -/// events: vec![], +/// ..Default::default() /// }; /// /// let session = CodexProjector::default().project(&view).unwrap(); @@ -711,6 +705,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -788,6 +783,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::Shell), + ..Default::default() }]; let s = CodexProjector::default() .project(&view_with(vec![t])) @@ -844,6 +840,7 @@ mod tests { input: json!({"command": "ls"}), result: None, category: Some(ToolCategory::Shell), + ..Default::default() }]; let s = CodexProjector::default() .project(&view_with(vec![t])) @@ -874,6 +871,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileWrite), + ..Default::default() }]; let s = CodexProjector::default() .project(&view_with(vec![t])) diff --git a/crates/toolpath-codex/src/provider.rs b/crates/toolpath-codex/src/provider.rs index 211d936..964b4d0 100644 --- a/crates/toolpath-codex/src/provider.rs +++ b/crates/toolpath-codex/src/provider.rs @@ -271,6 +271,7 @@ impl<'a> Builder<'a> { files_changed: self.files_changed_order, session_ids: vec![], events: self.events, + ..Default::default() } } @@ -409,6 +410,7 @@ impl<'a> Builder<'a> { input, result: None, category, + ..Default::default() }; let turn_idx = match self.last_assistant_turn_index() { diff --git a/crates/toolpath-convo/src/derive.rs b/crates/toolpath-convo/src/derive.rs index 4c8bafd..aa9f728 100644 --- a/crates/toolpath-convo/src/derive.rs +++ b/crates/toolpath-convo/src/derive.rs @@ -564,14 +564,9 @@ mod tests { fn view_with(turns: Vec) -> ConversationView { ConversationView { id: "abcdef012345".to_string(), - started_at: None, - last_activity: None, turns, - total_usage: None, provider_id: Some("pi".to_string()), - files_changed: vec![], - session_ids: vec![], - events: vec![], + ..Default::default() } } @@ -654,6 +649,7 @@ mod tests { input, result: None, category: Some(ToolCategory::FileWrite), + ..Default::default() } } @@ -724,6 +720,7 @@ mod tests { input: serde_json::json!({"file_path": "x.rs"}), result: None, category: Some(ToolCategory::FileRead), + ..Default::default() }]; let view = view_with(vec![turn]); let path = derive_path(&view, &DeriveConfig::default()); @@ -921,6 +918,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), + ..Default::default() }]; let view = view_with(vec![turn]); let path = derive_path(&view, &DeriveConfig::default()); @@ -937,6 +935,7 @@ mod tests { input: serde_json::json!({}), result: None, category: Some(ToolCategory::FileRead), + ..Default::default() }]; let view = view_with(vec![turn]); let cfg = DeriveConfig { diff --git a/crates/toolpath-convo/src/extract.rs b/crates/toolpath-convo/src/extract.rs index b98f628..40838b2 100644 --- a/crates/toolpath-convo/src/extract.rs +++ b/crates/toolpath-convo/src/extract.rs @@ -24,17 +24,7 @@ use crate::{ /// `conversation.append`, and `tool.invoke` are recognized; everything else /// is silently skipped. pub fn extract_conversation(path: &Path) -> ConversationView { - let mut view = ConversationView { - id: String::new(), - started_at: None, - last_activity: None, - turns: Vec::new(), - total_usage: None, - provider_id: None, - files_changed: Vec::new(), - session_ids: Vec::new(), - events: Vec::new(), - }; + let mut view = ConversationView::default(); // Map from step ID → index into view.turns, for parent lookups. let mut step_to_turn: HashMap<&str, usize> = HashMap::new(); @@ -291,6 +281,7 @@ fn build_inline_tool_uses(extra: &HashMap) -> Vec) -> ToolInvo input, result, category, + ..Default::default() } } diff --git a/crates/toolpath-convo/src/lib.rs b/crates/toolpath-convo/src/lib.rs index 1ea8db6..ec16240 100644 --- a/crates/toolpath-convo/src/lib.rs +++ b/crates/toolpath-convo/src/lib.rs @@ -69,6 +69,48 @@ pub struct TokenUsage { pub cache_write_tokens: Option, } +/// Path-level base context for a conversation: where the session was rooted +/// and against what VCS state. Populated by the provider's `to_view`; projects +/// straight onto `Path.base` by `derive_path`. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct SessionBase { + /// Working directory (absolute path). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub working_dir: Option, + /// VCS revision (commit hash, changeset id). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vcs_revision: Option, + /// VCS branch. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vcs_branch: Option, + /// Repository URL or other origin identifier. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vcs_remote: Option, +} + +/// A file mutation resolved at view-construction time. Providers populate this +/// on the `ToolInvocation` that caused the mutation; `derive_path` projects +/// each entry into a sibling artifact change keyed by `path` with +/// `structural.type == "file.write"`. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct FileMutation { + /// File path (relative to `view.base.working_dir` if relative, or + /// `file://`/absolute). + pub path: String, + /// Operation: `"add"`, `"update"`, `"delete"`, or a provider-specific tag. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub operation: Option, + /// Unified diff (the canonical perspective). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub raw_diff: Option, + /// File contents before this mutation (when known). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub before: Option, + /// File contents after this mutation (when known). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub after: Option, +} + /// Snapshot of the working environment when a turn was produced. /// /// All fields are optional. Providers populate what they have. @@ -145,7 +187,7 @@ pub enum ToolCategory { } /// A tool invocation within a turn. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct ToolInvocation { /// Provider-assigned identifier for this invocation. pub id: String, @@ -159,6 +201,11 @@ pub struct ToolInvocation { /// crate; `None` for unrecognized tools. #[serde(default, skip_serializing_if = "Option::is_none")] pub category: Option, + /// File mutations this invocation produced, with diffs pre-resolved by + /// the provider's `to_view`. Each entry projects to a sibling + /// `file.write` artifact change in the derived step. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub file_mutations: Vec, } /// The result of a tool invocation. @@ -221,7 +268,7 @@ pub struct Turn { } /// A complete conversation from any provider. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct ConversationView { /// Unique session/conversation identifier. pub id: String, @@ -259,6 +306,17 @@ pub struct ConversationView { /// be preserved for round-trip fidelity. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub events: Vec, + + /// Path-level base: where this session was rooted (`cwd`, git + /// commit/branch/remote). Projects directly to `Path.base`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub base: Option, + + /// Path-level provider-namespaced extras. Projects directly to + /// `Path.meta.extra`. Providers SHOULD namespace under their short id + /// (e.g. `extra["codex"]`, `extra["opencode"]`) to avoid collisions. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub extra: HashMap, } impl ConversationView { @@ -494,6 +552,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), + ..Default::default() }], model: Some("claude-opus-4-6".into()), stop_reason: Some("end_turn".into()), @@ -528,6 +587,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -557,6 +617,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() }; assert!(view.title(50).is_none()); } @@ -795,6 +856,7 @@ mod tests { input: serde_json::json!({"command": "ls"}), result: None, category: Some(ToolCategory::Shell), + ..Default::default() }; let json = serde_json::to_string(&ti).unwrap(); assert!(json.contains("\"shell\"")); @@ -810,6 +872,7 @@ mod tests { input: serde_json::json!({}), result: None, category: None, + ..Default::default() }; let json = serde_json::to_string(&ti).unwrap(); assert!(!json.contains("category")); @@ -902,6 +965,7 @@ mod tests { files_changed: vec!["src/main.rs".into(), "src/lib.rs".into()], session_ids: vec![], events: vec![], + ..Default::default() }; let json = serde_json::to_string(&view).unwrap(); let back: ConversationView = serde_json::from_str(&json).unwrap(); @@ -994,6 +1058,7 @@ mod tests { event_type: "attachment".into(), data: HashMap::new(), }], + ..Default::default() }; let json = serde_json::to_string(&view).unwrap(); assert!(json.contains("events")); @@ -1014,6 +1079,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() }; let json = serde_json::to_string(&view).unwrap(); assert!(!json.contains("events")); diff --git a/crates/toolpath-convo/src/project.rs b/crates/toolpath-convo/src/project.rs index b2e1223..6a7a2f6 100644 --- a/crates/toolpath-convo/src/project.rs +++ b/crates/toolpath-convo/src/project.rs @@ -83,14 +83,7 @@ where /// /// let view = ConversationView { /// id: "s1".into(), -/// started_at: None, -/// last_activity: None, -/// turns: vec![], -/// total_usage: None, -/// provider_id: None, -/// files_changed: vec![], -/// session_ids: vec![], -/// events: vec![], +/// ..Default::default() /// }; /// /// let projector = AnyProjector::new(TurnCounter); @@ -157,6 +150,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -193,6 +187,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -357,6 +352,7 @@ mod tests { is_error: false, }), category: None, + ..Default::default() }, ToolInvocation { id: "u2".into(), @@ -364,6 +360,7 @@ mod tests { input: serde_json::json!({"command": "cargo test"}), result: None, category: None, + ..Default::default() }, ], model: None, @@ -377,6 +374,7 @@ mod tests { provider_id: None, files_changed: vec![], session_ids: vec![], + ..Default::default() }; let any = AnyProjector::new(ToolNameCollector); @@ -458,6 +456,7 @@ mod tests { provider_id: None, files_changed: vec![], session_ids: vec![], + ..Default::default() }; let any = AnyProjector::new(TotalInputTokens); diff --git a/crates/toolpath-gemini/src/project.rs b/crates/toolpath-gemini/src/project.rs index be8768f..cd655dc 100644 --- a/crates/toolpath-gemini/src/project.rs +++ b/crates/toolpath-gemini/src/project.rs @@ -643,6 +643,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -783,6 +784,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), + ..Default::default() }]; let convo = GeminiProjector::default() .project(&view_with(vec![t])) @@ -813,6 +815,7 @@ mod tests { is_error: true, }), category: Some(ToolCategory::Shell), + ..Default::default() }]; let convo = GeminiProjector::default() .project(&view_with(vec![t])) @@ -833,6 +836,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileWrite), + ..Default::default() }]; t.extra.insert( "gemini".into(), @@ -976,6 +980,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), + ..Default::default() }]; let convo = GeminiProjector::default() diff --git a/crates/toolpath-gemini/src/provider.rs b/crates/toolpath-gemini/src/provider.rs index 44be2ad..65f395a 100644 --- a/crates/toolpath-gemini/src/provider.rs +++ b/crates/toolpath-gemini/src/provider.rs @@ -177,6 +177,7 @@ fn tool_call_to_invocation(call: &ToolCall) -> ToolInvocation { input: call.args.clone(), result, category: tool_category(&call.name), + ..Default::default() } } @@ -372,6 +373,7 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { files_changed, session_ids: vec![], events: vec![], + ..Default::default() } } diff --git a/crates/toolpath-opencode/src/project.rs b/crates/toolpath-opencode/src/project.rs index b7868b5..32a0e94 100644 --- a/crates/toolpath-opencode/src/project.rs +++ b/crates/toolpath-opencode/src/project.rs @@ -800,6 +800,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -850,6 +851,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::Shell), + ..Default::default() }]; let s = OpencodeProjector::default() .project(&view_with(vec![t])) @@ -888,6 +890,7 @@ mod tests { is_error: true, }), category: Some(ToolCategory::Shell), + ..Default::default() }]; let s = OpencodeProjector::default() .project(&view_with(vec![t])) @@ -912,6 +915,7 @@ mod tests { input: json!({"file_path": "x.rs", "old_string": "a", "new_string": "b"}), result: None, category: Some(ToolCategory::FileWrite), + ..Default::default() }]; let s = OpencodeProjector::default() .project(&view_with(vec![t])) diff --git a/crates/toolpath-opencode/src/provider.rs b/crates/toolpath-opencode/src/provider.rs index 603d95b..227b14a 100644 --- a/crates/toolpath-opencode/src/provider.rs +++ b/crates/toolpath-opencode/src/provider.rs @@ -208,6 +208,7 @@ impl<'a> Builder<'a> { files_changed: self.files_changed_order, session_ids: vec![self.session.id.clone()], events: self.events, + ..Default::default() } } @@ -501,6 +502,7 @@ fn to_invocation( input, result, category: tool_category(&tp.tool), + ..Default::default() } } diff --git a/crates/toolpath-pi/src/project.rs b/crates/toolpath-pi/src/project.rs index a29a918..79177cd 100644 --- a/crates/toolpath-pi/src/project.rs +++ b/crates/toolpath-pi/src/project.rs @@ -816,6 +816,7 @@ mod tests { files_changed: vec![], session_ids: vec![], events: vec![], + ..Default::default() } } @@ -862,6 +863,7 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), + ..Default::default() }]; let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); // session header + assistant + tool-result = 3 entries @@ -918,6 +920,7 @@ mod tests { input: serde_json::json!({"command": "ls"}), result: None, category: Some(ToolCategory::Shell), + ..Default::default() }]; let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); match &session.entries[1] { diff --git a/crates/toolpath-pi/src/provider.rs b/crates/toolpath-pi/src/provider.rs index c73ea50..04a90bf 100644 --- a/crates/toolpath-pi/src/provider.rs +++ b/crates/toolpath-pi/src/provider.rs @@ -536,6 +536,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { input: arguments.clone(), result: None, category, + ..Default::default() }); } } @@ -617,6 +618,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { is_error: !matches!(exit_code, Some(0)), }), category: Some(ToolCategory::Shell), + ..Default::default() }); } @@ -762,6 +764,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { files_changed, session_ids, events: vec![], + ..Default::default() } } From add355055937ffa40c9e868683f156db8f19d2fb Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 13:41:12 -0400 Subject: [PATCH 02/10] toolpath-convo: project view.base/extra/file_mutations through derive_path `derive_path` now projects the new `ConversationView` fields: - Step ids use `turn.id` / `event.id` when non-empty; otherwise synthesize `step-NNNN` / `event-NNNN` as before. Lets claude's native UUIDs round-trip cleanly through `extract_conversation`. - `view.base` (`SessionBase`) projects onto `Path.base` (`uri` from `working_dir`, `ref` from `vcs_revision`, `branch` from `vcs_branch`). `vcs_remote` (no slot on `Path.base`) lands in `meta.extra["vcs_remote"]`. `config.base_uri` still wins as a CLI override. - `view.extra` merges into `path.meta.extra`. - Each `ToolInvocation::file_mutations` entry emits a sibling `file.write` change keyed by `path` with `raw = raw_diff` and `tool` / `tool_id` / `operation` / `before` / `after` in `structural.extra`. Falls back to the existing synth-from-tool-input behavior when `file_mutations` is empty so un-migrated providers keep working. `extract_conversation` is the inverse: extracts `path.base` / `meta.extra` / sibling `file.write` entries back onto `view.base` / `view.extra` / `tool_uses[].file_mutations` (matched by `tool_id`). Four derive tests that pinned to synthesized `step-NNNN` ids updated to the native turn ids. --- crates/toolpath-convo/src/derive.rs | 120 +++++++++++++++++++++++---- crates/toolpath-convo/src/extract.rs | 96 ++++++++++++++++++++- 2 files changed, 196 insertions(+), 20 deletions(-) diff --git a/crates/toolpath-convo/src/derive.rs b/crates/toolpath-convo/src/derive.rs index aa9f728..d1e5e9d 100644 --- a/crates/toolpath-convo/src/derive.rs +++ b/crates/toolpath-convo/src/derive.rs @@ -52,14 +52,33 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { .clone() .unwrap_or_else(|| format!("path-{}-{}", provider, id_prefix)); - // Base URI: config override wins; otherwise first turn's working_dir + // Base resolution order: + // 1. `config.base_uri` (CLI override): provides the `uri`; ref/branch + // come from `view.base` if set. + // 2. `view.base` (provider-populated): the canonical source. + // 3. First turn's `environment.working_dir` (legacy fallback). let base = config .base_uri .clone() .map(|uri| Base { uri, - ref_str: None, - branch: None, + ref_str: view.base.as_ref().and_then(|b| b.vcs_revision.clone()), + branch: view.base.as_ref().and_then(|b| b.vcs_branch.clone()), + }) + .or_else(|| { + view.base.as_ref().and_then(|b| { + let wd = b.working_dir.as_ref()?; + let uri = if wd.starts_with('/') { + format!("file://{}", wd) + } else { + wd.clone() + }; + Some(Base { + uri, + ref_str: b.vcs_revision.clone(), + branch: b.vcs_branch.clone(), + }) + }) }) .or_else(|| { view.turns @@ -86,7 +105,13 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { let mut actors: HashMap = HashMap::new(); for (idx, turn) in view.turns.iter().enumerate() { - let step_id = format!("step-{:04}", idx + 1); + // Step id: use the turn's native id when set so it round-trips + // through `extract_conversation`; otherwise synthesize sequentially. + let step_id = if turn.id.is_empty() { + format!("step-{:04}", idx + 1) + } else { + turn.id.clone() + }; turn_to_step.insert(turn.id.clone(), step_id.clone()); let actor = actor_for_turn(turn, provider); @@ -194,21 +219,62 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { }, ); - // File-write tool invocations → artifact changes. Each gets a unified - // diff in `raw` (so it renders like a git diff) plus the structured - // before/after strings in `structural.extra` for tools that want to - // re-apply or inspect the op programmatically. + // File mutations → sibling `file.write` change entries. + // + // Preferred: each `ToolInvocation::file_mutations` entry comes from + // the provider's `to_view` with the resolved diff already in + // `raw_diff` (claude's git-HEAD lookup, codex's `apply_patch_end` + // parse, opencode's git2 tree↔tree, etc.). + // + // Fallback: for tools whose category is `FileWrite` but whose + // `file_mutations` is empty (providers that haven't migrated yet), + // synthesize a diff from the tool's `input` via `file_write_change`. for tool in &turn.tool_uses { + if !tool.file_mutations.is_empty() { + for fm in &tool.file_mutations { + let mut t_extra: HashMap = HashMap::new(); + t_extra.insert( + "tool".to_string(), + serde_json::Value::String(tool.name.clone()), + ); + t_extra.insert( + "tool_id".to_string(), + serde_json::Value::String(tool.id.clone()), + ); + if let Some(op) = &fm.operation { + t_extra.insert( + "operation".to_string(), + serde_json::Value::String(op.clone()), + ); + } + if let Some(b) = &fm.before { + t_extra.insert( + "before".to_string(), + serde_json::Value::String(b.clone()), + ); + } + if let Some(a) = &fm.after { + t_extra.insert("after".to_string(), serde_json::Value::String(a.clone())); + } + step.change.insert( + fm.path.clone(), + ArtifactChange { + raw: fm.raw_diff.clone(), + structural: Some(StructuralChange { + change_type: "file.write".to_string(), + extra: t_extra, + }), + }, + ); + } + continue; + } if tool.category != Some(ToolCategory::FileWrite) { continue; } let Some(path) = extract_file_path(tool) else { continue; }; - // Shared derivation doesn't have access to a local checkout, - // so it can't resolve pre-write file state. Providers that do - // (e.g. `toolpath-claude`) build their own steps and pass a - // resolved `before_state` directly to `file_write_diff`. let (raw, mut t_extra) = file_write_change(tool, &path, None); t_extra.insert( "tool".to_string(), @@ -239,7 +305,12 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { // Without this, derive_path drops everything outside `turns`, so a // Claude session loses ~10–25% of its lines on import/export. for (idx, event) in view.events.iter().enumerate() { - let step_id = format!("event-{:04}", idx + 1); + // Event step id: prefer the event's native id so it round-trips. + let step_id = if event.id.is_empty() { + format!("event-{:04}", idx + 1) + } else { + event.id.clone() + }; let actor = format!("provider:{}", provider); actors .entry(actor.clone()) @@ -330,6 +401,21 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { meta.extra.insert("files_changed".to_string(), v); } + // Project path-level provider-namespaced extras straight onto meta.extra. + for (k, v) in &view.extra { + meta.extra.insert(k.clone(), v.clone()); + } + + // Carry `vcs_remote` (not representable on `Base`) under meta.extra. + if let Some(remote) = view.base.as_ref().and_then(|b| b.vcs_remote.as_ref()) + && !meta.extra.contains_key("vcs_remote") + { + meta.extra.insert( + "vcs_remote".to_string(), + serde_json::Value::String(remote.clone()), + ); + } + Path { path: PathIdentity { id: path_id, @@ -595,7 +681,7 @@ mod tests { let path = derive_path(&view, &DeriveConfig::default()); assert_eq!(path.steps.len(), 1); assert_eq!(path.steps[0].step.actor, "human:user"); - assert_eq!(path.steps[0].step.id, "step-0001"); + assert_eq!(path.steps[0].step.id, "t1"); } #[test] @@ -639,7 +725,7 @@ mod tests { t2.model = Some("m".into()); let view = view_with(vec![t1, t2]); let path = derive_path(&view, &DeriveConfig::default()); - assert_eq!(path.steps[1].step.parents, vec!["step-0001".to_string()]); + assert_eq!(path.steps[1].step.parents, vec!["t1".to_string()]); } fn fw_tool(name: &str, id: &str, input: serde_json::Value) -> ToolInvocation { @@ -1039,7 +1125,7 @@ mod tests { ]; let view = view_with(turns); let path = derive_path(&view, &DeriveConfig::default()); - assert_eq!(path.path.head, "step-0003"); + assert_eq!(path.path.head, "t3"); } #[test] @@ -1127,7 +1213,7 @@ mod tests { assert_eq!(back.path.id, path.path.id); assert_eq!(back.path.head, path.path.head); assert_eq!(back.steps.len(), 2); - assert_eq!(back.steps[1].step.parents, vec!["step-0001".to_string()]); + assert_eq!(back.steps[1].step.parents, vec!["t1".to_string()]); assert!(back.steps[1].change.contains_key("x.rs")); } } diff --git a/crates/toolpath-convo/src/extract.rs b/crates/toolpath-convo/src/extract.rs index 40838b2..dfbdf4f 100644 --- a/crates/toolpath-convo/src/extract.rs +++ b/crates/toolpath-convo/src/extract.rs @@ -13,8 +13,8 @@ use chrono::DateTime; use toolpath::v1::{Path, Step}; use crate::{ - ConversationEvent, ConversationView, DelegatedWork, EnvironmentSnapshot, Role, TokenUsage, - ToolCategory, ToolInvocation, ToolResult, Turn, + ConversationEvent, ConversationView, DelegatedWork, EnvironmentSnapshot, FileMutation, Role, + SessionBase, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, }; /// Extract a [`ConversationView`] from a toolpath [`Path`] document. @@ -26,12 +26,95 @@ use crate::{ pub fn extract_conversation(path: &Path) -> ConversationView { let mut view = ConversationView::default(); + // Project `path.base` back to `view.base`. + if let Some(base) = &path.path.base { + let working_dir = base + .uri + .strip_prefix("file://") + .map(|s| s.to_string()) + .or_else(|| { + if base.uri.is_empty() { + None + } else { + Some(base.uri.clone()) + } + }); + let vcs_remote = path + .meta + .as_ref() + .and_then(|m| m.extra.get("vcs_remote")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let sb = SessionBase { + working_dir, + vcs_revision: base.ref_str.clone(), + vcs_branch: base.branch.clone(), + vcs_remote, + }; + if sb.working_dir.is_some() + || sb.vcs_revision.is_some() + || sb.vcs_branch.is_some() + || sb.vcs_remote.is_some() + { + view.base = Some(sb); + } + } + + // Project `path.meta.extra` back to `view.extra`. `files_changed` and + // `vcs_remote` are handled by other slots. + if let Some(meta) = &path.meta { + for (k, v) in &meta.extra { + if k == "files_changed" || k == "vcs_remote" { + continue; + } + view.extra.insert(k.clone(), v.clone()); + } + } + // Map from step ID → index into view.turns, for parent lookups. let mut step_to_turn: HashMap<&str, usize> = HashMap::new(); // Track files_changed for dedup in insertion order. let mut files_seen: HashSet = HashSet::new(); for step in &path.steps { + // Pre-collect file.write entries on this step, indexed by tool_id, + // so we can attach them as `tool.file_mutations` once the turn is + // built. The iteration order of `step.change` (HashMap) is + // non-deterministic; a pre-pass keeps the attach step simple. + let mut step_mutations: HashMap> = HashMap::new(); + for (key, ch) in &step.change { + let Some(s) = &ch.structural else { continue }; + if s.change_type != "file.write" { + continue; + } + let tid = s + .extra + .get("tool_id") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let fm = FileMutation { + path: key.clone(), + operation: s + .extra + .get("operation") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + raw_diff: ch.raw.clone(), + before: s + .extra + .get("before") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + after: s + .extra + .get("after") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + }; + step_mutations.entry(tid).or_default().push(fm); + } + for (artifact_key, artifact_change) in &step.change { let structural = match &artifact_change.structural { Some(s) => s, @@ -56,7 +139,14 @@ pub fn extract_conversation(path: &Path) -> ConversationView { view.id = session.to_string(); } - let turn = build_turn(step, &structural.extra); + let mut turn = build_turn(step, &structural.extra); + // Attach pre-collected file mutations to their tool_uses + // by `tool_id`. + for tu in turn.tool_uses.iter_mut() { + if let Some(fms) = step_mutations.remove(&tu.id) { + tu.file_mutations = fms; + } + } let idx = view.turns.len(); step_to_turn.insert(&step.step.id, idx); view.turns.push(turn); From 290e126015e5cf86944439cd9f54f64843201fbc Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 14:31:41 -0400 Subject: [PATCH 03/10] toolpath-pi: populate view.base in session_to_view Pi's `derive_path` is already a one-line wrapper around `toolpath_convo::derive_path`, so the only migration work is enriching `session_to_view` with the new view fields where applicable. - `view.base` now carries `working_dir` from `session.header.cwd`. - `view.extra` left empty (no provider-namespaced path-level extras today). - `tool.file_mutations` left empty (Pi sessions don't carry pre-resolved diffs; the synth-from-tool-input fallback in `derive_path` handles file writes as before). End-to-end test updated: head now equals the last turn's native id ("m4") instead of the synthesized "step-0004", since `derive_path` now preserves turn ids when set. --- crates/toolpath-gemini/src/project.rs | 8 +------- crates/toolpath-pi/src/project.rs | 8 +------- crates/toolpath-pi/src/provider.rs | 13 ++++++++++++- crates/toolpath-pi/tests/end_to_end.rs | 3 ++- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/crates/toolpath-gemini/src/project.rs b/crates/toolpath-gemini/src/project.rs index cd655dc..4ea1519 100644 --- a/crates/toolpath-gemini/src/project.rs +++ b/crates/toolpath-gemini/src/project.rs @@ -42,14 +42,8 @@ use crate::types::{ /// /// let view = ConversationView { /// id: "session-uuid".into(), -/// started_at: None, -/// last_activity: None, -/// turns: vec![], -/// total_usage: None, /// provider_id: Some("gemini-cli".into()), -/// files_changed: vec![], -/// session_ids: vec![], -/// events: vec![], +/// ..Default::default() /// }; /// /// let projector = GeminiProjector::default(); diff --git a/crates/toolpath-pi/src/project.rs b/crates/toolpath-pi/src/project.rs index 79177cd..84b2570 100644 --- a/crates/toolpath-pi/src/project.rs +++ b/crates/toolpath-pi/src/project.rs @@ -48,14 +48,8 @@ use crate::types::{ /// /// let view = ConversationView { /// id: "session-uuid".into(), -/// started_at: None, -/// last_activity: None, -/// turns: vec![], -/// total_usage: None, /// provider_id: Some("pi".into()), -/// files_changed: vec![], -/// session_ids: vec![], -/// events: vec![], +/// ..Default::default() /// }; /// /// let session = PiProjector::default().project(&view).unwrap(); diff --git a/crates/toolpath-pi/src/provider.rs b/crates/toolpath-pi/src/provider.rs index 04a90bf..9f44439 100644 --- a/crates/toolpath-pi/src/provider.rs +++ b/crates/toolpath-pi/src/provider.rs @@ -21,7 +21,8 @@ use serde_json::{Map, Value, json}; use std::collections::HashMap; use toolpath_convo::{ ConversationMeta, ConversationProvider, ConversationView, ConvoError, DelegatedWork, - EnvironmentSnapshot, Role, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, + EnvironmentSnapshot, Role, SessionBase, TokenUsage, ToolCategory, ToolInvocation, ToolResult, + Turn, }; // ── Classification helpers ─────────────────────────────────────────── @@ -754,6 +755,15 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { let started_at = parse_ts(&session.header.timestamp); let last_activity = turns.last().and_then(|t| parse_ts(&t.timestamp)); + let base = if session.header.cwd.is_empty() { + None + } else { + Some(SessionBase { + working_dir: Some(session.header.cwd.clone()), + ..Default::default() + }) + }; + ConversationView { id: session.header.id.clone(), started_at, @@ -764,6 +774,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { files_changed, session_ids, events: vec![], + base, ..Default::default() } } diff --git a/crates/toolpath-pi/tests/end_to_end.rs b/crates/toolpath-pi/tests/end_to_end.rs index c57f5d6..8ff8a3b 100644 --- a/crates/toolpath-pi/tests/end_to_end.rs +++ b/crates/toolpath-pi/tests/end_to_end.rs @@ -99,7 +99,8 @@ fn test_derive_path_from_fixture() { // Path ID format. assert!(path.path.id.starts_with("path-pi-")); // Head points at the last step. - assert_eq!(path.path.head, "step-0004"); + // Head matches the last turn's native id (which is the source entry id). + assert_eq!(path.path.head, "m4"); // Base URI derived from cwd. assert!( path.path From 37141728bf54819a88190a056291aa460382c128 Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 15:06:30 -0400 Subject: [PATCH 04/10] toolpath-codex: migrate to shared derive_path; add ProducerInfo + rename_to MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates `toolpath-codex/src/derive.rs` from its bespoke `derive_path_from_view` to a one-line wrapper around `toolpath_convo::derive_path`. All codex-specific data (cwd, git, file diffs from `patch_apply_end`) is captured during `to_view`; nothing provider-specific lives in `derive.rs` anymore (767 lines → ~170). Behavior changes for codex-derived paths: - File-mutation `structural.type` unifies on `"file.write"` (was `codex.add` / `update` / `delete` / `unknown`). `operation` lives in `structural.extra`; rename target lives in the new `FileMutation.rename_to`. - `tool_calls` summary array on `conversation.append` dropped — it was a pre-formatted display alias for `tool_uses` (same info). - `phase` field on `conversation.append` dropped — still accessible under `turn.extra["codex"]["phase"]`. - Actor `Identity` records simplify to the shared shape. - Empty-carrier turn filter moved into `to_view`. - Turns now carry `parent_id` linked sequentially. Synthetic ids (`codex-turn-NNNN`) assigned to turns whose source message had no native id. Event ids disambiguated by suffixing collisions. IR additions (the only typed slots that earned their keep): - `ProducerInfo { name, version }` and `ConversationView.producer` — codex's `originator` + `cli_version` have no existing slot. - `FileMutation.rename_to: Option` — first-class rename concept; replaces codex's `move_path`. Things explicitly kept off the IR (no smuggling through provider-namespaced extras): - `model_provider` ("openai", "anthropic", …) already lives on `ActorDefinition.provider` per assistant turn. - Codex's `source` ("cli") and `forked_from_id` have no cross-harness analog; the codex projector hard-codes defaults on the return path, so they drop here. - `byte_count` / `line_count` are derivable from `after`. Also: shared `derive_path` now chains event steps off the previous step when their `event.parent_id` isn't set, so codex's `session_meta` / `turn_context` / `patch_apply_end` event steps land on the head ancestry instead of orphaning. --- crates/toolpath-codex/src/derive.rs | 678 ++---------------------- crates/toolpath-codex/src/provider.rs | 202 ++++++- crates/toolpath-codex/tests/fidelity.rs | 9 +- crates/toolpath-convo/src/derive.rs | 39 +- crates/toolpath-convo/src/extract.rs | 27 +- crates/toolpath-convo/src/lib.rs | 26 +- 6 files changed, 296 insertions(+), 685 deletions(-) diff --git a/crates/toolpath-codex/src/derive.rs b/crates/toolpath-codex/src/derive.rs index 2c9ef9a..dcf0102 100644 --- a/crates/toolpath-codex/src/derive.rs +++ b/crates/toolpath-codex/src/derive.rs @@ -1,25 +1,15 @@ //! Derive Toolpath documents from Codex CLI sessions. //! -//! Each `Turn` in the assembled `ConversationView` becomes a `Step`. -//! Every step's `change` map carries: -//! -//! - One entry at `codex://` with a `conversation.append` -//! structural op holding the turn's text and tool-call summaries. -//! - Sibling entries for each file touched by a `patch_apply_end` -//! whose `call_id` landed in this turn. Codex's structured patch -//! output gives us the unified diff verbatim for updates, and the -//! full file content for adds — both are surfaced as -//! `ArtifactChange.raw` so nothing is lost. +//! Thin wrapper around the shared [`toolpath_convo::derive_path`]: convert +//! the session to a provider-agnostic [`toolpath_convo::ConversationView`] +//! via [`crate::provider::to_view`] and hand off. All Codex-specific data +//! (cwd, git, file diffs from `patch_apply_end`, codex meta aggregates) is +//! captured during `to_view`; this module only sets the title and any +//! CLI overrides. use crate::provider::to_view; -use crate::types::{PatchChange, Session}; -use serde_json::{Map, Value, json}; -use std::collections::HashMap; -use toolpath::v1::{ - ActorDefinition, ArtifactChange, Base, Identity, Path, PathIdentity, PathMeta, Step, - StepIdentity, StructuralChange, -}; -use toolpath_convo::{ConversationView, Role, Turn}; +use crate::types::Session; +use toolpath::v1::Path; /// Configuration for deriving a Toolpath Path from a Codex session. /// @@ -39,7 +29,20 @@ pub struct DeriveConfig { /// Derive a [`Path`] from a Codex [`Session`]. pub fn derive_path(session: &Session, config: &DeriveConfig) -> Path { let view = to_view(session); - derive_path_from_view(session, &view, config) + let prefix: String = view.id.chars().take(8).collect(); + let base_uri = config.project_path.as_ref().map(|p| { + if p.starts_with('/') { + format!("file://{}", p) + } else { + p.clone() + } + }); + let cfg = toolpath_convo::DeriveConfig { + base_uri, + title: Some(format!("Codex session: {}", prefix)), + ..Default::default() + }; + toolpath_convo::derive_path(&view, &cfg) } /// Derive a [`Path`] from multiple sessions. Used for bulk exports. @@ -47,511 +50,6 @@ pub fn derive_project(sessions: &[Session], config: &DeriveConfig) -> Vec sessions.iter().map(|s| derive_path(s, config)).collect() } -/// Internal: build the Path from a pre-built `ConversationView` plus -/// the source `Session` (for session_meta fields like `git`). -fn derive_path_from_view( - session: &Session, - view: &ConversationView, - config: &DeriveConfig, -) -> Path { - let meta = session.meta(); - let session_short: String = session.id.chars().take(8).collect(); - let path_id = format!("path-codex-{}", session_short); - let convo_artifact = format!("codex://{}", session.id); - - let mut steps: Vec = Vec::with_capacity(view.turns.len()); - let mut actors: HashMap = HashMap::new(); - let mut last_step_id: Option = None; - - for (turn_idx, turn) in view.turns.iter().enumerate() { - let Some(step) = build_step( - turn_idx, - turn, - &convo_artifact, - last_step_id.as_deref(), - &mut actors, - ) else { - continue; - }; - last_step_id = Some(step.step.id.clone()); - steps.push(step); - } - - let head = last_step_id.unwrap_or_else(|| "empty".to_string()); - - // Base: CLI-override wins; otherwise session_meta.cwd; fall back to - // the first turn's environment.working_dir. - let base_uri = config - .project_path - .clone() - .or_else(|| meta.as_ref().map(|m| m.cwd.to_string_lossy().to_string())) - .or_else(|| { - view.turns - .first() - .and_then(|t| t.environment.as_ref()?.working_dir.clone()) - }) - .map(|p| { - if p.starts_with('/') { - format!("file://{}", p) - } else { - p - } - }); - - // Base ref: git commit if session_meta carries one. - let base_ref = meta - .as_ref() - .and_then(|m| m.git.as_ref().and_then(|g| g.commit_hash.clone())); - let base_branch = meta - .as_ref() - .and_then(|m| m.git.as_ref().and_then(|g| g.branch.clone())); - - let base = base_uri.map(|uri| Base { - uri, - ref_str: base_ref, - branch: base_branch, - }); - - // Top-level path meta: actors, title, source, and a Codex extras - // bucket with the session-level metadata so every consumer sees - // it (git origin, cli version, model provider). - let mut path_extra: HashMap = HashMap::new(); - let mut codex_meta: Map = Map::new(); - if let Some(m) = meta.as_ref() { - codex_meta.insert("session_id".into(), Value::String(session.id.clone())); - codex_meta.insert("originator".into(), Value::String(m.originator.clone())); - codex_meta.insert("cli_version".into(), Value::String(m.cli_version.clone())); - codex_meta.insert("source".into(), Value::String(m.source.clone())); - if let Some(model_provider) = &m.model_provider { - codex_meta.insert( - "model_provider".into(), - Value::String(model_provider.clone()), - ); - } - if let Some(forked) = &m.forked_from_id { - codex_meta.insert("forked_from_id".into(), Value::String(forked.clone())); - } - if let Some(git) = &m.git { - let mut g: Map = Map::new(); - if let Some(v) = &git.commit_hash { - g.insert("commit_hash".into(), Value::String(v.clone())); - } - if let Some(v) = &git.branch { - g.insert("branch".into(), Value::String(v.clone())); - } - if let Some(v) = &git.repository_url { - g.insert("repository_url".into(), Value::String(v.clone())); - } - if !g.is_empty() { - codex_meta.insert("git".into(), Value::Object(g)); - } - } - } - if !view.files_changed.is_empty() { - codex_meta.insert( - "files_changed".into(), - Value::Array( - view.files_changed - .iter() - .map(|p| Value::String(p.clone())) - .collect(), - ), - ); - } - if !codex_meta.is_empty() { - path_extra.insert("codex".into(), Value::Object(codex_meta)); - } - - Path { - path: PathIdentity { - id: path_id, - base, - head, - graph_ref: None, - }, - steps, - meta: Some(PathMeta { - title: Some(format!("Codex session: {}", session_short)), - source: Some("codex".to_string()), - actors: if actors.is_empty() { - None - } else { - Some(actors) - }, - extra: path_extra, - ..Default::default() - }), - } -} - -fn build_step( - turn_idx: usize, - turn: &Turn, - convo_artifact: &str, - parent_id: Option<&str>, - actors: &mut HashMap, -) -> Option { - // Skip empty carrier turns (all-tool, no content) UNLESS they have - // tool invocations with captured data, in which case we keep them. - if turn.text.is_empty() - && turn.tool_uses.is_empty() - && turn.thinking.is_none() - && extract_patch_changes(turn).is_empty() - { - return None; - } - - let (actor, role_str) = resolve_actor(turn, actors); - - // Build conversation.append structural - let mut convo_extra: HashMap = HashMap::new(); - convo_extra.insert("role".into(), json!(role_str)); - if !turn.text.is_empty() { - convo_extra.insert("text".into(), json!(turn.text)); - } - // Plaintext reasoning summaries land here automatically; encrypted - // ciphertext never does (lives under turn.extra["codex"]). - if let Some(th) = turn.thinking.as_deref() - && !th.is_empty() - { - convo_extra.insert("thinking".into(), json!(th)); - } - if !turn.tool_uses.is_empty() { - // `tool_uses` array shape matches what `toolpath_convo::extract` - // reads (id, name, input, category, result). Without this the - // tool calls vanish on the extract → `ConversationView` round-trip, - // and any consumer re-projecting from that view sees assistant text - // only — no tool calls, no results. Keep the name+call_id+summary - // aliases too so existing consumers don't break. - let uses: Vec = turn - .tool_uses - .iter() - .map(|tu| { - let mut obj = serde_json::Map::new(); - obj.insert("id".into(), json!(tu.id)); - obj.insert("name".into(), json!(tu.name)); - obj.insert("input".into(), tu.input.clone()); - if let Some(cat) = tu.category { - obj.insert("category".into(), json!(cat)); - } - if let Some(r) = tu.result.as_ref() { - obj.insert( - "result".into(), - json!({"content": r.content, "is_error": r.is_error}), - ); - } - Value::Object(obj) - }) - .collect(); - convo_extra.insert("tool_uses".into(), Value::Array(uses)); - - // Legacy summary for human-readable consumers / displays. - let calls: Vec = turn - .tool_uses - .iter() - .map(|tu| { - json!({ - "name": tu.name, - "call_id": tu.id, - "category": tu.category, - "summary": tool_call_summary(tu), - "status": tool_call_status(turn, &tu.id), - }) - }) - .collect(); - convo_extra.insert("tool_calls".into(), Value::Array(calls)); - } - if let Some(u) = turn.token_usage.as_ref() { - convo_extra.insert("token_usage".into(), json!(u)); - } - if let Some(ph) = turn - .extra - .get("codex") - .and_then(|c| c.get("phase")) - .and_then(|v| v.as_str()) - { - convo_extra.insert("phase".into(), json!(ph)); - } - - let convo_change = ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.append".to_string(), - extra: convo_extra, - }), - }; - - let mut changes: HashMap = HashMap::new(); - changes.insert(convo_artifact.to_string(), convo_change); - - // File changes from patch_apply_end attached to this turn. - for (path, patch) in extract_patch_changes(turn) { - changes.insert(path, patch); - } - - let step_id = format!("step-{:04}", turn_idx + 1); - let parents = parent_id.map(|p| vec![p.to_string()]).unwrap_or_default(); - - Some(Step { - step: StepIdentity { - id: step_id, - parents, - actor, - timestamp: turn.timestamp.clone(), - }, - change: changes, - meta: None, - }) -} - -fn resolve_actor( - turn: &Turn, - actors: &mut HashMap, -) -> (String, &'static str) { - match &turn.role { - Role::User => { - actors - .entry("human:user".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("User".to_string()), - ..Default::default() - }); - ("human:user".to_string(), "user") - } - Role::Assistant => { - let (actor_key, model_str) = match &turn.model { - Some(m) if !m.is_empty() => (format!("agent:{}", m), m.clone()), - _ => ("agent:codex".to_string(), "codex".to_string()), - }; - actors - .entry(actor_key.clone()) - .or_insert_with(|| ActorDefinition { - name: Some("Codex CLI".to_string()), - provider: Some("openai".to_string()), - model: Some(model_str.clone()), - identities: vec![Identity { - system: "openai".to_string(), - id: model_str, - }], - ..Default::default() - }); - (actor_key, "assistant") - } - Role::System => { - actors - .entry("system:codex".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("Codex CLI system".to_string()), - provider: Some("openai".to_string()), - ..Default::default() - }); - ("system:codex".to_string(), "developer") - } - Role::Other(s) => { - let key = format!("other:{}", s); - actors - .entry(key.clone()) - .or_insert_with(|| ActorDefinition { - name: Some(s.clone()), - ..Default::default() - }); - (key, "other") - } - } -} - -fn tool_call_status(turn: &Turn, call_id: &str) -> String { - turn.extra - .get("codex") - .and_then(|c| c.get("tool_extras")) - .and_then(|t| t.get(call_id)) - .and_then(|te| te.get("status").or_else(|| te.get("exit_code"))) - .and_then(|v| { - v.as_str() - .map(str::to_string) - .or_else(|| v.as_i64().map(|n| n.to_string())) - }) - .unwrap_or_else(|| { - turn.tool_uses - .iter() - .find(|tu| tu.id == call_id) - .and_then(|tu| tu.result.as_ref()) - .map(|r| { - if r.is_error { - "error".to_string() - } else { - "success".to_string() - } - }) - .unwrap_or_default() - }) -} - -/// Compact human-readable summary of a tool invocation's salient args. -fn tool_call_summary(tu: &toolpath_convo::ToolInvocation) -> String { - let pick = |k: &str| -> Option { - tu.input.get(k).and_then(|v| v.as_str()).map(str::to_string) - }; - let summary = match tu.name.as_str() { - "exec_command" | "shell" | "unified_exec" => pick("cmd").or_else(|| pick("command")), - "write_stdin" => pick("chars").or_else(|| pick("session_id")), - "read_file" | "read_many_files" | "list_dir" | "view_image" => pick("path"), - "write_file" | "replace" | "edit" => pick("file_path"), - "apply_patch" => { - // input is a raw patch string; surface the first change line - tu.input.as_str().and_then(|s| { - s.lines() - .find(|l| { - l.starts_with("*** Add File:") - || l.starts_with("*** Update File:") - || l.starts_with("*** Delete File:") - }) - .map(str::to_string) - }) - } - "glob" | "grep_search" | "search_file_content" => pick("pattern").or_else(|| pick("query")), - "web_fetch" => pick("url"), - "web_search" | "google_web_search" => pick("query"), - "spawn_agent" | "task" | "activate_skill" => pick("prompt").or_else(|| pick("task")), - _ => None, - }; - summary.unwrap_or_default() -} - -/// Pull `patch_apply_end.changes` off a turn's extras and turn each into -/// a toolpath `ArtifactChange` with both perspectives populated. -fn extract_patch_changes(turn: &Turn) -> Vec<(String, ArtifactChange)> { - let Some(codex) = turn.extra.get("codex") else { - return Vec::new(); - }; - let Some(Value::Array(patches)) = codex.get("patch_changes") else { - return Vec::new(); - }; - - let mut out: Vec<(String, ArtifactChange)> = Vec::new(); - for patch in patches { - let Some(Value::Object(changes)) = patch.get("changes") else { - continue; - }; - for (path, change_val) in changes { - let Some(change) = parse_patch_change(change_val) else { - continue; - }; - let (raw, structural) = patch_change_to_perspectives(&change, path); - out.push(( - path.clone(), - ArtifactChange { - raw, - structural: Some(structural), - }, - )); - } - } - out -} - -fn parse_patch_change(v: &Value) -> Option { - serde_json::from_value::(v.clone()).ok() -} - -fn patch_change_to_perspectives( - change: &PatchChange, - file_path: &str, -) -> (Option, StructuralChange) { - let mut extra: HashMap = HashMap::new(); - match change { - PatchChange::Add { content, .. } => { - extra.insert("operation".into(), json!("add")); - extra.insert("byte_count".into(), json!(content.len())); - extra.insert("line_count".into(), json!(content.lines().count())); - let raw = synth_add_diff(file_path, content); - ( - Some(raw), - StructuralChange { - change_type: "codex.add".into(), - extra, - }, - ) - } - PatchChange::Update { - unified_diff, - move_path, - .. - } => { - extra.insert("operation".into(), json!("update")); - if let Some(mp) = move_path { - extra.insert("move_path".into(), json!(mp)); - } - ( - Some(unified_diff.clone()), - StructuralChange { - change_type: "codex.update".into(), - extra, - }, - ) - } - PatchChange::Delete { - original_content, .. - } => { - extra.insert("operation".into(), json!("delete")); - let raw = original_content - .as_ref() - .map(|c| synth_delete_diff(file_path, c)); - ( - raw, - StructuralChange { - change_type: "codex.delete".into(), - extra, - }, - ) - } - PatchChange::Unknown => { - extra.insert("operation".into(), json!("unknown")); - ( - None, - StructuralChange { - change_type: "codex.unknown".into(), - extra, - }, - ) - } - } -} - -fn synth_add_diff(_path: &str, content: &str) -> String { - let lines: Vec<&str> = content.split('\n').collect(); - // Git-style add: all lines prefixed with +. - // Strip trailing empty element from the trailing newline, if any. - let effective: &[&str] = if lines.last() == Some(&"") { - &lines[..lines.len().saturating_sub(1)] - } else { - &lines[..] - }; - let mut buf = format!("@@ -0,0 +1,{} @@\n", effective.len()); - for l in effective { - buf.push('+'); - buf.push_str(l); - buf.push('\n'); - } - buf -} - -fn synth_delete_diff(_path: &str, original: &str) -> String { - let lines: Vec<&str> = original.split('\n').collect(); - let effective: &[&str] = if lines.last() == Some(&"") { - &lines[..lines.len().saturating_sub(1)] - } else { - &lines[..] - }; - let mut buf = format!("@@ -1,{} +0,0 @@\n", effective.len()); - for l in effective { - buf.push('-'); - buf.push_str(l); - buf.push('\n'); - } - buf -} - #[cfg(test)] mod tests { use super::*; @@ -577,9 +75,6 @@ mod tests { r#"{"timestamp":"2026-04-20T16:44:37.773Z","type":"turn_context","payload":{"turn_id":"t1","cwd":"/tmp/proj","model":"gpt-5.4"}}"#, r#"{"timestamp":"2026-04-20T16:44:37.800Z","type":"response_item","payload":{"type":"message","role":"user","content":[{"type":"input_text","text":"build me a thing"}]}}"#, r#"{"timestamp":"2026-04-20T16:44:38.100Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"creating"}],"phase":"commentary"}}"#, - r#"{"timestamp":"2026-04-20T16:44:38.200Z","type":"response_item","payload":{"type":"function_call","name":"exec_command","arguments":"{\"cmd\":\"pwd\"}","call_id":"c1"}}"#, - r#"{"timestamp":"2026-04-20T16:44:38.300Z","type":"response_item","payload":{"type":"function_call_output","call_id":"c1","output":"/tmp/proj\n"}}"#, - r#"{"timestamp":"2026-04-20T16:44:38.400Z","type":"event_msg","payload":{"type":"exec_command_end","call_id":"c1","command":["/bin/bash","-lc","pwd"],"stdout":"/tmp/proj\n","exit_code":0,"aggregated_output":"/tmp/proj\n"}}"#, r#"{"timestamp":"2026-04-20T16:44:38.500Z","type":"response_item","payload":{"type":"custom_tool_call","call_id":"c2","name":"apply_patch","input":"*** Begin Patch\n*** Add File: /tmp/proj/a.rs\n+fn main() {}\n*** End Patch"}}"#, r#"{"timestamp":"2026-04-20T16:44:38.700Z","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"c2","success":true,"changes":{"/tmp/proj/a.rs":{"type":"add","content":"fn main() {}\n"}}}}"#, r#"{"timestamp":"2026-04-20T16:44:38.900Z","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"done"}],"phase":"final","end_turn":true}}"#, @@ -599,8 +94,10 @@ mod tests { path.path.base.as_ref().unwrap().ref_str.as_deref(), Some("abc") ); - // 3 turns → 3 steps (user, assistant 1, assistant 2). - assert_eq!(path.steps.len(), 3); + assert_eq!( + path.path.base.as_ref().unwrap().branch.as_deref(), + Some("main") + ); } #[test] @@ -614,26 +111,29 @@ mod tests { } #[test] - fn derive_path_preserves_conversation_artifact() { + fn derive_path_producer_in_canonical_slot() { let (_t, mgr, id) = fixture_session(&minimal_body()); let session = mgr.read_session(&id).unwrap(); let path = derive_path(&session, &DeriveConfig::default()); - let artifact = format!("codex://{}", session.id); - for step in &path.steps { - assert!( - step.change.contains_key(&artifact), - "step {} missing convo artifact", - step.step.id - ); - } + let meta_extra = &path.meta.as_ref().unwrap().extra; + // Producer (originator + cli_version) lives in its canonical slot. + let producer = meta_extra + .get("producer") + .and_then(|v| v.as_object()) + .expect("meta.extra.producer object"); + assert_eq!(producer.get("name").and_then(|v| v.as_str()), Some("codex-tui")); + assert_eq!(producer.get("version").and_then(|v| v.as_str()), Some("0.118.0")); + // Nothing else codex-specific is smuggled through meta.extra. + assert!(!meta_extra.contains_key("codex")); } #[test] - fn derive_path_surfaces_apply_patch_as_file_artifact() { + fn derive_path_apply_patch_emits_file_write_sibling() { let (_t, mgr, id) = fixture_session(&minimal_body()); let session = mgr.read_session(&id).unwrap(); let path = derive_path(&session, &DeriveConfig::default()); - // Find the step with the file artifact. + // The assistant turn that ran `apply_patch` carries a sibling + // `file.write` entry keyed by the file path. let file_step = path .steps .iter() @@ -646,110 +146,28 @@ mod tests { "raw must be a unified diff" ); let structural = change.structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "codex.add"); + assert_eq!(structural.change_type, "file.write"); assert_eq!(structural.extra["operation"], "add"); } #[test] - fn derive_path_update_perspectives_preserved() { - // Session with an `update` change carrying a real unified_diff. - let body = [ - r#"{"timestamp":"t","type":"session_meta","payload":{"id":"s","timestamp":"t","cwd":"/p","originator":"x","cli_version":"1","source":"cli"}}"#, - r#"{"timestamp":"t","type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"edit"}]}}"#, - r#"{"timestamp":"t","type":"response_item","payload":{"type":"custom_tool_call","call_id":"c","name":"apply_patch","input":"*** Update File: /p/a.rs\n@@"}}"#, - r#"{"timestamp":"t","type":"event_msg","payload":{"type":"patch_apply_end","call_id":"c","success":true,"changes":{"/p/a.rs":{"type":"update","unified_diff":"@@ -1 +1 @@\n-old\n+new"}}}}"#, - ].join("\n"); - let (_t, mgr, id) = fixture_session(&body); - let session = mgr.read_session(&id).unwrap(); - let path = derive_path(&session, &DeriveConfig::default()); - let file_change = path - .steps - .iter() - .find_map(|s| s.change.get("/p/a.rs")) - .expect("update should land as file artifact"); - assert_eq!(file_change.raw.as_deref(), Some("@@ -1 +1 @@\n-old\n+new")); - let structural = file_change.structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "codex.update"); - } - - #[test] - fn derive_path_validates() { + fn derive_path_validates_as_single_path_graph() { let (_t, mgr, id) = fixture_session(&minimal_body()); let session = mgr.read_session(&id).unwrap(); let path = derive_path(&session, &DeriveConfig::default()); let doc = Graph::from_path(path); let json = doc.to_json().unwrap(); - // Round-trip through the validator (it just needs to parse). let parsed = Graph::from_json(&json).unwrap(); let p = parsed.single_path().expect("single-path graph"); - assert!(!p.steps.is_empty()); - let ancestors = toolpath::v1::query::ancestors(&p.steps, &p.path.head); - assert_eq!(ancestors.len(), p.steps.len(), "all steps on head ancestry"); - } - - #[test] - fn derive_path_shell_summary() { - let (_t, mgr, id) = fixture_session(&minimal_body()); - let session = mgr.read_session(&id).unwrap(); - let path = derive_path(&session, &DeriveConfig::default()); - let convo_artifact = format!("codex://{}", session.id); - // Find the step that had exec_command - let step = path - .steps - .iter() - .find(|s| { - s.change - .get(&convo_artifact) - .and_then(|c| c.structural.as_ref()) - .and_then(|sc| sc.extra.get("tool_calls")) - .and_then(|v| v.as_array()) - .map(|arr| arr.iter().any(|v| v["name"] == "exec_command")) - .unwrap_or(false) - }) - .expect("no step with exec_command"); - let calls = step.change[&convo_artifact] - .structural - .as_ref() - .unwrap() - .extra["tool_calls"] - .as_array() - .unwrap(); - let exec = &calls[0]; - assert_eq!(exec["summary"], "pwd"); - } - - #[test] - fn derive_path_meta_carries_git() { - let (_t, mgr, id) = fixture_session(&minimal_body()); - let session = mgr.read_session(&id).unwrap(); - let path = derive_path(&session, &DeriveConfig::default()); - let codex_meta = &path.meta.as_ref().unwrap().extra["codex"]; - let git = &codex_meta["git"]; - assert_eq!(git["commit_hash"], "abc"); - assert_eq!(git["branch"], "main"); + let anc = toolpath::v1::query::ancestors(&p.steps, &p.path.head); + assert_eq!(anc.len(), p.steps.len(), "all steps on head ancestry"); } #[test] - fn derive_project_multi() { + fn derive_project_per_session() { let (_t, mgr, id) = fixture_session(&minimal_body()); - let session = mgr.read_session(&id).unwrap(); - let paths = derive_project(&[session.clone(), session], &DeriveConfig::default()); - assert_eq!(paths.len(), 2); - assert_eq!(paths[0].path.id, paths[1].path.id); - } - - #[test] - fn synth_add_diff_has_plus_lines() { - let diff = synth_add_diff("a.rs", "hello\nworld\n"); - assert!(diff.contains("+hello")); - assert!(diff.contains("+world")); - assert!(diff.starts_with("@@ -0,0 +1,2 @@")); - } - - #[test] - fn synth_delete_diff_has_minus_lines() { - let diff = synth_delete_diff("a.rs", "gone\n"); - assert!(diff.contains("-gone")); - assert!(diff.starts_with("@@ -1,1 +0,0 @@")); + let s1 = mgr.read_session(&id).unwrap(); + let paths = derive_project(std::slice::from_ref(&s1), &DeriveConfig::default()); + assert_eq!(paths.len(), 1); } } diff --git a/crates/toolpath-codex/src/provider.rs b/crates/toolpath-codex/src/provider.rs index 964b4d0..87fff23 100644 --- a/crates/toolpath-codex/src/provider.rs +++ b/crates/toolpath-codex/src/provider.rs @@ -27,13 +27,14 @@ use std::collections::HashMap; use crate::io::ConvoIO; use crate::types::{ - EventMsg, ExecCommandEnd, Message, PatchApplyEnd, ResponseItem, RolloutItem, Session, - TokenCountInfo, TokenUsage as CodexTokenUsage, + EventMsg, ExecCommandEnd, Message, PatchApplyEnd, PatchChange, ResponseItem, RolloutItem, + Session, TokenCountInfo, TokenUsage as CodexTokenUsage, }; use serde_json::{Map, Value}; use toolpath_convo::{ ConversationEvent, ConversationMeta, ConversationProvider, ConversationView, ConvoError, - EnvironmentSnapshot, Role, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, + EnvironmentSnapshot, FileMutation, ProducerInfo, Role, SessionBase, TokenUsage, ToolCategory, + ToolInvocation, ToolResult, Turn, }; /// Provider for Codex sessions. @@ -257,6 +258,81 @@ impl<'a> Builder<'a> { } } + // Path-level base context from session_meta (cwd + git). + let meta = self.session.meta(); + let base = { + let wd = meta + .as_ref() + .map(|m| m.cwd.to_string_lossy().to_string()) + .filter(|s| !s.is_empty()) + .or_else(|| self.working_dir.clone()); + let git = meta.as_ref().and_then(|m| m.git.as_ref()); + let revision = git.and_then(|g| g.commit_hash.clone()); + let branch = git.and_then(|g| g.branch.clone()); + let remote = git.and_then(|g| g.repository_url.clone()); + if wd.is_some() || revision.is_some() || branch.is_some() || remote.is_some() { + Some(SessionBase { + working_dir: wd, + vcs_revision: revision, + vcs_branch: branch, + vcs_remote: remote, + }) + } else { + None + } + }; + + // Producer (originator + cli_version) lifts onto the typed view + // field. `model_provider` already lives on each assistant + // `ActorDefinition.provider`. Codex's `source` and `forked_from_id` + // are wire-level fields with no cross-harness analog — the codex + // projector hard-codes defaults on the return path, so we let them + // drop on this side. + let producer = meta.as_ref().map(|m| ProducerInfo { + name: m.originator.clone(), + version: Some(m.cli_version.clone()), + }); + + // Filter empty carrier turns (no text, no thinking, no tool calls). + // Previously done inside `derive_path_from_view`; moved here so the + // canonical `derive_path` sees only meaningful turns. + self.turns + .retain(|t| !(t.text.is_empty() && t.thinking.is_none() && t.tool_uses.is_empty())); + + // Assign synthetic ids to turns whose source message didn't carry + // one, then link sequentially via `parent_id` so the shared + // `derive_path` can walk a connected DAG. Codex turns don't carry + // explicit parent ids on the wire; this preserves the linear + // ordering the old `derive_path_from_view` produced. + for (idx, t) in self.turns.iter_mut().enumerate() { + if t.id.is_empty() { + t.id = format!("codex-turn-{:04}", idx + 1); + } + } + let mut prev: Option = None; + for t in self.turns.iter_mut() { + if t.parent_id.is_none() { + t.parent_id = prev.clone(); + } + prev = Some(t.id.clone()); + } + + // Disambiguate event ids. `event_from_raw` synthesizes + // `-`, which collides when codex emits + // multiple events of the same type at the same timestamp (rare + // but real). Suffix duplicates with their position so each step + // gets a unique id. + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + for t in &self.turns { + seen.insert(t.id.clone()); + } + for (i, e) in self.events.iter_mut().enumerate() { + if !seen.insert(e.id.clone()) { + e.id = format!("{}-{:04}", e.id, i); + seen.insert(e.id.clone()); + } + } + ConversationView { id: self.session.id.clone(), started_at: self.session.started_at(), @@ -271,6 +347,8 @@ impl<'a> Builder<'a> { files_changed: self.files_changed_order, session_ids: vec![], events: self.events, + base, + producer, ..Default::default() } } @@ -523,25 +601,28 @@ impl<'a> Builder<'a> { } fn apply_patch_apply_end(&mut self, patch: &PatchApplyEnd) { - let turn_idx = self.call_index.get(&patch.call_id).map(|(i, _)| *i); + let loc = self.call_index.get(&patch.call_id).copied(); - if let Some(turn_idx) = turn_idx { + // `patch.changes` is a HashMap — iterate in sorted order so the + // derived order is deterministic across runs. + let mut paths: Vec<&String> = patch.changes.keys().collect(); + paths.sort(); + + // Populate `tool.file_mutations` on the matching tool invocation so + // `derive_path` can project each file mutation into a sibling + // `file.write` change. + if let Some((turn_idx, tool_idx)) = loc { let turn = &mut self.turns[turn_idx]; - let codex = turn_extra_codex_mut(turn); - let patches = codex - .entry("patch_changes") - .or_insert_with(|| Value::Array(Vec::new())); - if let Value::Array(arr) = patches - && let Ok(v) = serde_json::to_value(patch) - { - arr.push(v); + if let Some(tool) = turn.tool_uses.get_mut(tool_idx) { + for path in &paths { + if let Some(change) = patch.changes.get(*path) { + tool.file_mutations + .push(patch_change_to_file_mutation(path, change)); + } + } } } - // `patch.changes` is a HashMap — iterate in sorted order so the - // derived `files_changed` list is deterministic across runs. - let mut paths: Vec<&String> = patch.changes.keys().collect(); - paths.sort(); for path in paths { if self.files_changed_seen.insert(path.clone()) { self.files_changed_order.push(path.clone()); @@ -585,6 +666,74 @@ impl<'a> Builder<'a> { } } +// ── Patch → FileMutation conversion ───────────────────────────────── + +fn patch_change_to_file_mutation(path: &str, change: &PatchChange) -> FileMutation { + let mut fm = FileMutation { + path: path.to_string(), + ..Default::default() + }; + match change { + PatchChange::Add { content, .. } => { + fm.operation = Some("add".into()); + fm.after = Some(content.clone()); + fm.raw_diff = Some(synth_add_diff(content)); + } + PatchChange::Update { + unified_diff, + move_path, + .. + } => { + fm.operation = Some("update".into()); + fm.raw_diff = Some(unified_diff.clone()); + fm.rename_to = move_path.clone(); + } + PatchChange::Delete { + original_content, .. + } => { + fm.operation = Some("delete".into()); + fm.before = original_content.clone(); + fm.raw_diff = original_content.as_deref().map(synth_delete_diff); + } + PatchChange::Unknown => { + fm.operation = Some("unknown".into()); + } + } + fm +} + +fn synth_add_diff(content: &str) -> String { + let lines: Vec<&str> = content.split('\n').collect(); + let effective: &[&str] = if lines.last() == Some(&"") { + &lines[..lines.len().saturating_sub(1)] + } else { + &lines[..] + }; + let mut buf = format!("@@ -0,0 +1,{} @@\n", effective.len()); + for l in effective { + buf.push('+'); + buf.push_str(l); + buf.push('\n'); + } + buf +} + +fn synth_delete_diff(original: &str) -> String { + let lines: Vec<&str> = original.split('\n').collect(); + let effective: &[&str] = if lines.last() == Some(&"") { + &lines[..lines.len().saturating_sub(1)] + } else { + &lines[..] + }; + let mut buf = format!("@@ -1,{} +0,0 @@\n", effective.len()); + for l in effective { + buf.push('-'); + buf.push_str(l); + buf.push('\n'); + } + buf +} + fn message_to_turn( msg: &Message, timestamp: &str, @@ -948,14 +1097,21 @@ mod tests { } #[test] - fn patch_apply_end_attached_to_turn_extra() { + fn patch_apply_end_populates_tool_file_mutations() { let (_t, mgr, id) = setup_session_fixture(&minimal_session()); let view = to_view(&mgr.read_session(&id).unwrap()); - let assistant = &view.turns[1]; - let codex = assistant.extra.get("codex").unwrap(); - let patches = codex.get("patch_changes").unwrap().as_array().unwrap(); - assert_eq!(patches.len(), 1); - assert_eq!(patches[0]["changes"]["/tmp/proj/a.rs"]["type"], "add"); + // Find the assistant turn whose `apply_patch` tool produced the file. + let tu = view + .turns + .iter() + .flat_map(|t| t.tool_uses.iter()) + .find(|tu| tu.name == "apply_patch") + .expect("apply_patch tool invocation present"); + assert_eq!(tu.file_mutations.len(), 1); + let fm = &tu.file_mutations[0]; + assert_eq!(fm.path, "/tmp/proj/a.rs"); + assert_eq!(fm.operation.as_deref(), Some("add")); + assert!(fm.raw_diff.is_some()); } #[test] diff --git a/crates/toolpath-codex/tests/fidelity.rs b/crates/toolpath-codex/tests/fidelity.rs index 3bb9d24..1af3f30 100644 --- a/crates/toolpath-codex/tests/fidelity.rs +++ b/crates/toolpath-codex/tests/fidelity.rs @@ -191,14 +191,15 @@ fn collect_derived_tool_call_ids(path: &toolpath::v1::Path) -> HashSet { let Some(struc) = change.structural.as_ref() else { continue; }; - let Some(calls) = struc.extra.get("tool_calls") else { + // Canonical: `tool_uses` array entries carry `id` (= call_id). + let Some(uses) = struc.extra.get("tool_uses") else { continue; }; - let Some(arr) = calls.as_array() else { + let Some(arr) = uses.as_array() else { continue; }; - for c in arr { - if let Some(id) = c.get("call_id").and_then(|v| v.as_str()) { + for tu in arr { + if let Some(id) = tu.get("id").and_then(|v| v.as_str()) { ids.insert(id.to_string()); } } diff --git a/crates/toolpath-convo/src/derive.rs b/crates/toolpath-convo/src/derive.rs index d1e5e9d..738b4e6 100644 --- a/crates/toolpath-convo/src/derive.rs +++ b/crates/toolpath-convo/src/derive.rs @@ -256,6 +256,12 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { if let Some(a) = &fm.after { t_extra.insert("after".to_string(), serde_json::Value::String(a.clone())); } + if let Some(rt) = &fm.rename_to { + t_extra.insert( + "rename_to".to_string(), + serde_json::Value::String(rt.clone()), + ); + } step.change.insert( fm.path.clone(), ArtifactChange { @@ -304,6 +310,9 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { // and other non-turn entries survive the IR-to-Path-to-IR roundtrip. // Without this, derive_path drops everything outside `turns`, so a // Claude session loses ~10–25% of its lines on import/export. + // Track the last emitted step id so events without an explicit + // `parent_id` can chain off whatever step came before them. + let mut last_step_id: Option = steps.last().map(|s| s.step.id.clone()); for (idx, event) in view.events.iter().enumerate() { // Event step id: prefer the event's native id so it round-trips. let step_id = if event.id.is_empty() { @@ -348,15 +357,18 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { ); } + let parents: Vec = event + .parent_id + .as_ref() + .and_then(|pid| turn_to_step.get(pid).cloned()) + .or_else(|| last_step_id.clone()) + .into_iter() + .collect(); + let mut step = Step { step: StepIdentity { - id: step_id, - parents: event - .parent_id - .as_ref() - .and_then(|pid| turn_to_step.get(pid).cloned()) - .into_iter() - .collect(), + id: step_id.clone(), + parents, actor, timestamp: event.timestamp.clone(), }, @@ -375,6 +387,7 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { }, ); steps.push(step); + last_step_id = Some(step_id); } let head = steps.last().map(|s| s.step.id.clone()).unwrap_or_default(); @@ -401,11 +414,6 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { meta.extra.insert("files_changed".to_string(), v); } - // Project path-level provider-namespaced extras straight onto meta.extra. - for (k, v) in &view.extra { - meta.extra.insert(k.clone(), v.clone()); - } - // Carry `vcs_remote` (not representable on `Base`) under meta.extra. if let Some(remote) = view.base.as_ref().and_then(|b| b.vcs_remote.as_ref()) && !meta.extra.contains_key("vcs_remote") @@ -416,6 +424,13 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { ); } + // Project canonical session-level fields under well-known keys. + if let Some(producer) = &view.producer + && let Ok(v) = serde_json::to_value(producer) + { + meta.extra.insert("producer".to_string(), v); + } + Path { path: PathIdentity { id: path_id, diff --git a/crates/toolpath-convo/src/extract.rs b/crates/toolpath-convo/src/extract.rs index dfbdf4f..375b2b0 100644 --- a/crates/toolpath-convo/src/extract.rs +++ b/crates/toolpath-convo/src/extract.rs @@ -13,8 +13,8 @@ use chrono::DateTime; use toolpath::v1::{Path, Step}; use crate::{ - ConversationEvent, ConversationView, DelegatedWork, EnvironmentSnapshot, FileMutation, Role, - SessionBase, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, + ConversationEvent, ConversationView, DelegatedWork, EnvironmentSnapshot, FileMutation, + ProducerInfo, Role, SessionBase, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, }; /// Extract a [`ConversationView`] from a toolpath [`Path`] document. @@ -60,15 +60,15 @@ pub fn extract_conversation(path: &Path) -> ConversationView { } } - // Project `path.meta.extra` back to `view.extra`. `files_changed` and - // `vcs_remote` are handled by other slots. - if let Some(meta) = &path.meta { - for (k, v) in &meta.extra { - if k == "files_changed" || k == "vcs_remote" { - continue; - } - view.extra.insert(k.clone(), v.clone()); - } + // Recover canonical session-level fields from `path.meta.extra`. + // Unrecognized keys are dropped — the IR is the cross-harness contract. + if let Some(meta) = &path.meta + && let Some(p) = meta + .extra + .get("producer") + .and_then(|v| serde_json::from_value::(v.clone()).ok()) + { + view.producer = Some(p); } // Map from step ID → index into view.turns, for parent lookups. @@ -111,6 +111,11 @@ pub fn extract_conversation(path: &Path) -> ConversationView { .get("after") .and_then(|v| v.as_str()) .map(|s| s.to_string()), + rename_to: s + .extra + .get("rename_to") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), }; step_mutations.entry(tid).or_default().push(fm); } diff --git a/crates/toolpath-convo/src/lib.rs b/crates/toolpath-convo/src/lib.rs index ec16240..89831c3 100644 --- a/crates/toolpath-convo/src/lib.rs +++ b/crates/toolpath-convo/src/lib.rs @@ -69,6 +69,19 @@ pub struct TokenUsage { pub cache_write_tokens: Option, } +/// Identity of the software that produced a session: e.g. +/// `{ name: "codex-tui", version: "0.118.0" }`. Distinct from +/// [`ConversationView::provider_id`] (which is the high-level family — +/// `"codex"`, `"claude-code"` — used for dispatch). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ProducerInfo { + /// Producer name (e.g. `"codex-tui"`, `"claude-code"`, `"gemini-cli"`). + pub name: String, + /// Producer version, when the source format records one. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub version: Option, +} + /// Path-level base context for a conversation: where the session was rooted /// and against what VCS state. Populated by the provider's `to_view`; projects /// straight onto `Path.base` by `derive_path`. @@ -109,6 +122,10 @@ pub struct FileMutation { /// File contents after this mutation (when known). #[serde(default, skip_serializing_if = "Option::is_none")] pub after: Option, + /// When this mutation is a rename, the new path. Projected to + /// `structural.extra.rename_to`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub rename_to: Option, } /// Snapshot of the working environment when a turn was produced. @@ -312,11 +329,10 @@ pub struct ConversationView { #[serde(default, skip_serializing_if = "Option::is_none")] pub base: Option, - /// Path-level provider-namespaced extras. Projects directly to - /// `Path.meta.extra`. Providers SHOULD namespace under their short id - /// (e.g. `extra["codex"]`, `extra["opencode"]`) to avoid collisions. - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - pub extra: HashMap, + /// Producing software (CLI name + version). Distinct from + /// `provider_id`, which is the dispatch family. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub producer: Option, } impl ConversationView { From 528865a6fec20719f7ff678013e0dce7f8bbdabc Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 15:20:18 -0400 Subject: [PATCH 05/10] toolpath-opencode: migrate to shared derive_path; move file_mutations to Turn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrates `toolpath-opencode/src/derive.rs` from its bespoke `derive_path_from_view` to a one-line wrapper around `toolpath_convo::derive_path`. The snapshot git2 tree↔tree diff work moves into `provider::to_view_with_resolver`, which now produces a fully enriched `ConversationView` (snapshot-derived mutations on each turn, tool-input fallback for gitignored paths, `view.base` from `session.directory` + `session.project_id`, `view.producer` for the opencode version). `derive.rs` shrinks from ~820 lines to ~200. IR refinement included in this commit: `file_mutations` moves from `ToolInvocation` to `Turn`, with an optional `tool_id` field on each `FileMutation` linking back to a specific tool call when the provider can attribute it. This is the honest model — codex's `patch_apply_end` DOES attribute per-tool (via `call_id`), but opencode's snapshots are per-turn (no per-tool linkage available), and claude/gemini will populate `tool_id` from tool-input synthesis. The shared `derive_path` projects each `Turn.file_mutations` entry to a sibling `file.write` change, putting `tool` / `tool_id` in `structural.extra` when present. Behavior changes for opencode-derived paths: - File-mutation `structural.type` unifies on `"file.write"` (was `opencode.add` / `update` / `delete` / `touch`). - `tool_calls` summary array on `conversation.append` dropped — it was a pre-formatted display alias for `tool_uses`. - `meta.extra.opencode` aggregate dropped — `session_id` was `view.id`, `project_id` is `view.base.vcs_revision`, `slug` / `version` / `title` flow through other slots, `total_tokens` is `view.total_usage`, `files_changed` is `view.files_changed`. The opencode projector hard-codes safe defaults on the return path. - Actor `Identity` records simplify to the shared shape. - Empty-carrier turn filtering moved into `to_view`. `no_snapshot_diffs` kept on opencode's `DeriveConfig` for tests/CLI intent ("don't open git2"); skipping just routes through `to_view` instead of `to_view_with_resolver`. --- crates/toolpath-claude/src/project.rs | 2 + crates/toolpath-claude/src/provider.rs | 2 + crates/toolpath-codex/src/project.rs | 2 + crates/toolpath-codex/src/provider.rs | 39 +- crates/toolpath-convo/src/derive.rs | 99 +-- crates/toolpath-convo/src/extract.rs | 37 +- crates/toolpath-convo/src/lib.rs | 33 +- crates/toolpath-convo/src/project.rs | 4 + crates/toolpath-gemini/src/project.rs | 2 + crates/toolpath-gemini/src/provider.rs | 1 + crates/toolpath-opencode/src/derive.rs | 779 +++-------------------- crates/toolpath-opencode/src/project.rs | 2 + crates/toolpath-opencode/src/provider.rs | 255 +++++++- crates/toolpath-pi/src/project.rs | 2 + crates/toolpath-pi/src/provider.rs | 5 + 15 files changed, 490 insertions(+), 774 deletions(-) diff --git a/crates/toolpath-claude/src/project.rs b/crates/toolpath-claude/src/project.rs index 849347f..188fa5d 100644 --- a/crates/toolpath-claude/src/project.rs +++ b/crates/toolpath-claude/src/project.rs @@ -1028,6 +1028,7 @@ mod tests { environment: None, delegations: vec![], extra: Default::default(), + file_mutations: Vec::new(), } } @@ -1046,6 +1047,7 @@ mod tests { environment: None, delegations: vec![], extra: Default::default(), + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-claude/src/provider.rs b/crates/toolpath-claude/src/provider.rs index 1895733..1daecda 100644 --- a/crates/toolpath-claude/src/provider.rs +++ b/crates/toolpath-claude/src/provider.rs @@ -167,6 +167,7 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { environment, delegations, extra, + file_mutations: Vec::new(), } } @@ -1065,6 +1066,7 @@ mod tests { environment: None, delegations: vec![], extra: Default::default(), + file_mutations: Vec::new(), }]; // Create a message with results in reversed order diff --git a/crates/toolpath-codex/src/project.rs b/crates/toolpath-codex/src/project.rs index 1bcf007..52dc8c2 100644 --- a/crates/toolpath-codex/src/project.rs +++ b/crates/toolpath-codex/src/project.rs @@ -668,6 +668,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } @@ -691,6 +692,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-codex/src/provider.rs b/crates/toolpath-codex/src/provider.rs index 87fff23..3689d2f 100644 --- a/crates/toolpath-codex/src/provider.rs +++ b/crates/toolpath-codex/src/provider.rs @@ -608,17 +608,16 @@ impl<'a> Builder<'a> { let mut paths: Vec<&String> = patch.changes.keys().collect(); paths.sort(); - // Populate `tool.file_mutations` on the matching tool invocation so - // `derive_path` can project each file mutation into a sibling - // `file.write` change. - if let Some((turn_idx, tool_idx)) = loc { + // Populate `turn.file_mutations` on the matching turn, with + // `tool_id` set to the `call_id` so `derive_path` can link the + // sibling `file.write` change back to this specific tool call. + if let Some((turn_idx, _tool_idx)) = loc { let turn = &mut self.turns[turn_idx]; - if let Some(tool) = turn.tool_uses.get_mut(tool_idx) { - for path in &paths { - if let Some(change) = patch.changes.get(*path) { - tool.file_mutations - .push(patch_change_to_file_mutation(path, change)); - } + for path in &paths { + if let Some(change) = patch.changes.get(*path) { + let mut fm = patch_change_to_file_mutation(path, change); + fm.tool_id = Some(patch.call_id.clone()); + turn.file_mutations.push(fm); } } } @@ -791,6 +790,7 @@ fn message_to_turn( environment, delegations: Vec::new(), extra, + file_mutations: Vec::new(), } } @@ -817,6 +817,7 @@ fn synthetic_assistant_turn( }), delegations: Vec::new(), extra: HashMap::new(), + file_mutations: Vec::new(), } } @@ -1097,19 +1098,25 @@ mod tests { } #[test] - fn patch_apply_end_populates_tool_file_mutations() { + fn patch_apply_end_populates_turn_file_mutations() { let (_t, mgr, id) = setup_session_fixture(&minimal_session()); let view = to_view(&mgr.read_session(&id).unwrap()); - // Find the assistant turn whose `apply_patch` tool produced the file. - let tu = view + // Find the turn that hosts the `apply_patch` file mutation. The + // mutation's `tool_id` should link back to the apply_patch tool. + let apply_patch_id = view .turns .iter() .flat_map(|t| t.tool_uses.iter()) .find(|tu| tu.name == "apply_patch") + .map(|tu| tu.id.clone()) .expect("apply_patch tool invocation present"); - assert_eq!(tu.file_mutations.len(), 1); - let fm = &tu.file_mutations[0]; - assert_eq!(fm.path, "/tmp/proj/a.rs"); + let fm = view + .turns + .iter() + .flat_map(|t| t.file_mutations.iter()) + .find(|fm| fm.path == "/tmp/proj/a.rs") + .expect("file mutation present"); + assert_eq!(fm.tool_id.as_ref(), Some(&apply_patch_id)); assert_eq!(fm.operation.as_deref(), Some("add")); assert!(fm.raw_diff.is_some()); } diff --git a/crates/toolpath-convo/src/derive.rs b/crates/toolpath-convo/src/derive.rs index 738b4e6..c497f62 100644 --- a/crates/toolpath-convo/src/derive.rs +++ b/crates/toolpath-convo/src/derive.rs @@ -221,61 +221,65 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { // File mutations → sibling `file.write` change entries. // - // Preferred: each `ToolInvocation::file_mutations` entry comes from - // the provider's `to_view` with the resolved diff already in + // Preferred: each `Turn::file_mutations` entry comes from the + // provider's `to_view` with the resolved diff already in // `raw_diff` (claude's git-HEAD lookup, codex's `apply_patch_end` - // parse, opencode's git2 tree↔tree, etc.). + // parse, opencode's git2 tree↔tree, etc.). `tool_id` links back + // to a specific `ToolInvocation` when the provider can attribute. // - // Fallback: for tools whose category is `FileWrite` but whose - // `file_mutations` is empty (providers that haven't migrated yet), - // synthesize a diff from the tool's `input` via `file_write_change`. - for tool in &turn.tool_uses { - if !tool.file_mutations.is_empty() { - for fm in &tool.file_mutations { - let mut t_extra: HashMap = HashMap::new(); + // Fallback (un-migrated providers): for any `FileWrite`-category + // tool with no matching mutation, synthesize from `tool.input` + // via `file_write_change`. + let attributed: std::collections::HashSet = turn + .file_mutations + .iter() + .filter_map(|fm| fm.tool_id.clone()) + .collect(); + for fm in &turn.file_mutations { + let mut t_extra: HashMap = HashMap::new(); + if let Some(tid) = &fm.tool_id { + t_extra.insert( + "tool_id".to_string(), + serde_json::Value::String(tid.clone()), + ); + if let Some(tool) = turn.tool_uses.iter().find(|t| &t.id == tid) { t_extra.insert( "tool".to_string(), serde_json::Value::String(tool.name.clone()), ); - t_extra.insert( - "tool_id".to_string(), - serde_json::Value::String(tool.id.clone()), - ); - if let Some(op) = &fm.operation { - t_extra.insert( - "operation".to_string(), - serde_json::Value::String(op.clone()), - ); - } - if let Some(b) = &fm.before { - t_extra.insert( - "before".to_string(), - serde_json::Value::String(b.clone()), - ); - } - if let Some(a) = &fm.after { - t_extra.insert("after".to_string(), serde_json::Value::String(a.clone())); - } - if let Some(rt) = &fm.rename_to { - t_extra.insert( - "rename_to".to_string(), - serde_json::Value::String(rt.clone()), - ); - } - step.change.insert( - fm.path.clone(), - ArtifactChange { - raw: fm.raw_diff.clone(), - structural: Some(StructuralChange { - change_type: "file.write".to_string(), - extra: t_extra, - }), - }, - ); } - continue; } - if tool.category != Some(ToolCategory::FileWrite) { + if let Some(op) = &fm.operation { + t_extra.insert( + "operation".to_string(), + serde_json::Value::String(op.clone()), + ); + } + if let Some(b) = &fm.before { + t_extra.insert("before".to_string(), serde_json::Value::String(b.clone())); + } + if let Some(a) = &fm.after { + t_extra.insert("after".to_string(), serde_json::Value::String(a.clone())); + } + if let Some(rt) = &fm.rename_to { + t_extra.insert( + "rename_to".to_string(), + serde_json::Value::String(rt.clone()), + ); + } + step.change.insert( + fm.path.clone(), + ArtifactChange { + raw: fm.raw_diff.clone(), + structural: Some(StructuralChange { + change_type: "file.write".to_string(), + extra: t_extra, + }), + }, + ); + } + for tool in &turn.tool_uses { + if tool.category != Some(ToolCategory::FileWrite) || attributed.contains(&tool.id) { continue; } let Some(path) = extract_file_path(tool) else { @@ -659,6 +663,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-convo/src/extract.rs b/crates/toolpath-convo/src/extract.rs index 375b2b0..57e1635 100644 --- a/crates/toolpath-convo/src/extract.rs +++ b/crates/toolpath-convo/src/extract.rs @@ -77,24 +77,24 @@ pub fn extract_conversation(path: &Path) -> ConversationView { let mut files_seen: HashSet = HashSet::new(); for step in &path.steps { - // Pre-collect file.write entries on this step, indexed by tool_id, - // so we can attach them as `tool.file_mutations` once the turn is - // built. The iteration order of `step.change` (HashMap) is - // non-deterministic; a pre-pass keeps the attach step simple. - let mut step_mutations: HashMap> = HashMap::new(); + // Pre-collect file.write entries on this step. They attach to the + // turn built from this step's `conversation.append` change (below); + // the iteration order of `step.change` (HashMap) is non-deterministic + // so a pre-pass keeps the attach step simple. Sorted by path for + // determinism on the way back out. + let mut step_mutations: Vec = Vec::new(); for (key, ch) in &step.change { let Some(s) = &ch.structural else { continue }; if s.change_type != "file.write" { continue; } - let tid = s - .extra - .get("tool_id") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); let fm = FileMutation { path: key.clone(), + tool_id: s + .extra + .get("tool_id") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), operation: s .extra .get("operation") @@ -117,8 +117,9 @@ pub fn extract_conversation(path: &Path) -> ConversationView { .and_then(|v| v.as_str()) .map(|s| s.to_string()), }; - step_mutations.entry(tid).or_default().push(fm); + step_mutations.push(fm); } + step_mutations.sort_by(|a, b| a.path.cmp(&b.path)); for (artifact_key, artifact_change) in &step.change { let structural = match &artifact_change.structural { @@ -145,12 +146,11 @@ pub fn extract_conversation(path: &Path) -> ConversationView { } let mut turn = build_turn(step, &structural.extra); - // Attach pre-collected file mutations to their tool_uses - // by `tool_id`. - for tu in turn.tool_uses.iter_mut() { - if let Some(fms) = step_mutations.remove(&tu.id) { - tu.file_mutations = fms; - } + // Attach pre-collected file mutations to the turn. + // `tool_id` on each mutation links back to the + // specific `ToolInvocation` (when set by derive). + if !step_mutations.is_empty() { + turn.file_mutations = std::mem::take(&mut step_mutations); } let idx = view.turns.len(); step_to_turn.insert(&step.step.id, idx); @@ -321,6 +321,7 @@ fn build_turn(step: &Step, extra: &HashMap) -> Turn { token_usage, environment, delegations, + file_mutations: Vec::new(), extra: turn_extra, } } diff --git a/crates/toolpath-convo/src/lib.rs b/crates/toolpath-convo/src/lib.rs index 89831c3..3308ba0 100644 --- a/crates/toolpath-convo/src/lib.rs +++ b/crates/toolpath-convo/src/lib.rs @@ -101,15 +101,23 @@ pub struct SessionBase { pub vcs_remote: Option, } -/// A file mutation resolved at view-construction time. Providers populate this -/// on the `ToolInvocation` that caused the mutation; `derive_path` projects -/// each entry into a sibling artifact change keyed by `path` with -/// `structural.type == "file.write"`. +/// A file mutation resolved at view-construction time. Lives on the `Turn` +/// that produced it; `derive_path` projects each entry into a sibling +/// artifact change keyed by `path` with `structural.type == "file.write"`. +/// `tool_id` links back to the specific `ToolInvocation` that caused the +/// mutation when the provider can identify it (codex via `patch_apply_end` +/// call_id, claude/gemini via tool-input attribution); `None` when the +/// mutation is attributable only to the turn as a whole (opencode's +/// snapshot diffs between turns). #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct FileMutation { /// File path (relative to `view.base.working_dir` if relative, or /// `file://`/absolute). pub path: String, + /// `ToolInvocation::id` of the tool call that produced this mutation, + /// when the provider can attribute it. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tool_id: Option, /// Operation: `"add"`, `"update"`, `"delete"`, or a provider-specific tag. #[serde(default, skip_serializing_if = "Option::is_none")] pub operation: Option, @@ -218,11 +226,6 @@ pub struct ToolInvocation { /// crate; `None` for unrecognized tools. #[serde(default, skip_serializing_if = "Option::is_none")] pub category: Option, - /// File mutations this invocation produced, with diffs pre-resolved by - /// the provider's `to_view`. Each entry projects to a sibling - /// `file.write` artifact change in the derived step. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub file_mutations: Vec, } /// The result of a tool invocation. @@ -275,6 +278,14 @@ pub struct Turn { #[serde(default, skip_serializing_if = "Vec::is_empty")] pub delegations: Vec, + /// File mutations produced by this turn, with diffs pre-resolved by + /// the provider's `to_view`. Each entry projects to a sibling + /// `file.write` artifact change in the derived step. When the + /// mutation is attributable to a specific tool call, `tool_id` on + /// the entry links back to that `ToolInvocation::id`. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub file_mutations: Vec, + /// Provider-specific data that doesn't fit the common schema. /// /// Providers namespace their data under a provider key (e.g. @@ -551,6 +562,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), }, Turn { id: "t2".into(), @@ -581,6 +593,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), }, Turn { id: "t3".into(), @@ -596,6 +609,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), }, ], total_usage: None, @@ -944,6 +958,7 @@ mod tests { result: None, }], extra: HashMap::new(), + file_mutations: Vec::new(), }; let json = serde_json::to_string(&turn).unwrap(); let back: Turn = serde_json::from_str(&json).unwrap(); diff --git a/crates/toolpath-convo/src/project.rs b/crates/toolpath-convo/src/project.rs index 6a7a2f6..1a48409 100644 --- a/crates/toolpath-convo/src/project.rs +++ b/crates/toolpath-convo/src/project.rs @@ -169,6 +169,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } @@ -369,6 +370,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), }], total_usage: None, provider_id: None, @@ -430,6 +432,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), }, Turn { id: "t2".into(), @@ -450,6 +453,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), }, ], total_usage: None, diff --git a/crates/toolpath-gemini/src/project.rs b/crates/toolpath-gemini/src/project.rs index 4ea1519..395f83a 100644 --- a/crates/toolpath-gemini/src/project.rs +++ b/crates/toolpath-gemini/src/project.rs @@ -605,6 +605,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } @@ -623,6 +624,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-gemini/src/provider.rs b/crates/toolpath-gemini/src/provider.rs index 65f395a..32581ef 100644 --- a/crates/toolpath-gemini/src/provider.rs +++ b/crates/toolpath-gemini/src/provider.rs @@ -137,6 +137,7 @@ fn message_to_turn(msg: &GeminiMessage, working_dir: Option<&str>) -> Turn { environment, delegations: vec![], extra, + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-opencode/src/derive.rs b/crates/toolpath-opencode/src/derive.rs index cfb74c4..003ca40 100644 --- a/crates/toolpath-opencode/src/derive.rs +++ b/crates/toolpath-opencode/src/derive.rs @@ -1,43 +1,29 @@ //! Derive Toolpath documents from opencode sessions. //! -//! Each `Turn` becomes a `Step`. Every step's `change` map carries: -//! -//! - One entry at `opencode://` with a -//! `conversation.append` structural op describing the turn's text, -//! thinking, and tool-call summaries. -//! - Sibling entries for each file touched between the turn's -//! snapshot endpoints. When the snapshot git repo is on disk, -//! `ArtifactChange.raw` is the real unified diff from git. Otherwise -//! we fall back to file paths reported by tool inputs with no -//! `raw` perspective. +//! Thin wrapper around the shared [`toolpath_convo::derive_path`]. All +//! opencode-specific work (snapshot git2 tree↔tree diffs, tool-input +//! fallback for gitignored paths, producer/base population) happens in +//! [`crate::provider::to_view_with_resolver`]; nothing provider-specific +//! lives in this module. use crate::paths::PathResolver; -use crate::provider::{to_view, tool_category}; +use crate::provider::{to_view, to_view_with_resolver}; use crate::types::Session; -use serde_json::{Map, Value, json}; -use std::collections::HashMap; -use std::path::{Path as StdPath, PathBuf}; -use toolpath::v1::{ - ActorDefinition, ArtifactChange, Base, Identity, Path, PathIdentity, PathMeta, Step, - StepIdentity, StructuralChange, -}; -use toolpath_convo::{ConversationView, Role, Turn}; +use toolpath::v1::Path; -/// Configuration for deriving a Toolpath `Path` from an opencode -/// session. +/// Configuration for deriving a Toolpath `Path` from an opencode session. #[derive(Debug, Clone, Default)] pub struct DeriveConfig { /// Override `path.base.uri`. Defaults to `file://`. pub project_path: Option, - /// Disable snapshot-based file diff extraction even when the - /// snapshot repo is on disk. Useful for tests / offline runs. + /// Skip the git2 snapshot-diff IO. Useful for tests with no + /// snapshot repo on disk. pub no_snapshot_diffs: bool, } -/// Derive a `Path` from a loaded opencode `Session`. +/// Derive a [`Path`] from an opencode [`Session`]. pub fn derive_path(session: &Session, config: &DeriveConfig) -> Path { - let view = to_view(session); - derive_path_from_view(session, &view, config, &PathResolver::new()) + derive_path_with_resolver(session, config, &PathResolver::new()) } /// Like [`derive_path`] but with a custom `PathResolver` (useful for @@ -47,526 +33,31 @@ pub fn derive_path_with_resolver( config: &DeriveConfig, resolver: &PathResolver, ) -> Path { - let view = to_view(session); - derive_path_from_view(session, &view, config, resolver) -} - -/// Derive a `Path` from multiple sessions. -pub fn derive_project(sessions: &[Session], config: &DeriveConfig) -> Vec { - sessions.iter().map(|s| derive_path(s, config)).collect() -} - -fn derive_path_from_view( - session: &Session, - view: &ConversationView, - config: &DeriveConfig, - resolver: &PathResolver, -) -> Path { - let session_short: String = session - .id - .trim_start_matches("ses_") - .chars() - .take(8) - .collect(); - let path_id = format!("path-opencode-{}", session_short); - let convo_artifact = format!("opencode://{}", session.id); - - // Open the snapshot git repo if present. A single open for the - // whole derive is fine — git2 is thread-local enough for our needs. - let snapshot_repo: Option = if config.no_snapshot_diffs { - None + let view = if config.no_snapshot_diffs { + to_view(session) } else { - resolver - .snapshot_gitdir(&session.project_id, &session.directory) - .ok() - .and_then(|gd| git2::Repository::open(gd).ok()) - }; - - let mut steps: Vec = Vec::with_capacity(view.turns.len()); - let mut actors: HashMap = HashMap::new(); - let mut last_step_id: Option = None; - let mut prev_snapshot_after: Option = None; - let mut all_files: Vec = Vec::new(); - let mut files_seen = std::collections::HashSet::::new(); - - for (turn_idx, turn) in view.turns.iter().enumerate() { - let Some(step) = build_step( - turn_idx, - turn, - &convo_artifact, - last_step_id.as_deref(), - &mut actors, - &snapshot_repo, - &mut prev_snapshot_after, - &mut all_files, - &mut files_seen, - ) else { - continue; - }; - last_step_id = Some(step.step.id.clone()); - steps.push(step); - } - - let head = last_step_id.unwrap_or_else(|| "empty".to_string()); - - // Base: CLI-override wins; otherwise session.directory; fall back - // to the first turn's working_dir. - let base_uri = config - .project_path - .clone() - .or_else(|| Some(session.directory.to_string_lossy().to_string())) - .map(|p| { - if p.starts_with('/') { - format!("file://{}", p) - } else { - p - } - }); - // Base ref: first-root-commit SHA (== project_id) is a stable - // ancestor identifier. - let base_ref = Some(session.project_id.clone()); - let base = base_uri.map(|uri| Base { - uri, - ref_str: base_ref, - branch: None, - }); - - // Top-level path meta: actors, title, source, opencode metadata. - let mut path_extra: HashMap = HashMap::new(); - let mut oc: Map = Map::new(); - oc.insert("session_id".into(), Value::String(session.id.clone())); - oc.insert( - "project_id".into(), - Value::String(session.project_id.clone()), - ); - oc.insert("slug".into(), Value::String(session.slug.clone())); - oc.insert("version".into(), Value::String(session.version.clone())); - if let Some(total) = view.total_usage.as_ref() { - oc.insert( - "total_tokens".into(), - serde_json::to_value(total).unwrap_or(Value::Null), - ); - } - if !all_files.is_empty() { - oc.insert( - "files_changed".into(), - Value::Array(all_files.iter().map(|p| Value::String(p.clone())).collect()), - ); - } - path_extra.insert("opencode".into(), Value::Object(oc)); - - Path { - path: PathIdentity { - id: path_id, - base, - head, - graph_ref: None, - }, - steps, - meta: Some(PathMeta { - title: Some(format!("opencode session: {}", session.title)), - source: Some("opencode".to_string()), - actors: if actors.is_empty() { - None - } else { - Some(actors) - }, - extra: path_extra, - ..Default::default() - }), - } -} - -#[allow(clippy::too_many_arguments)] -fn build_step( - turn_idx: usize, - turn: &Turn, - convo_artifact: &str, - parent_id: Option<&str>, - actors: &mut HashMap, - snapshot_repo: &Option, - prev_snapshot_after: &mut Option, - all_files: &mut Vec, - files_seen: &mut std::collections::HashSet, -) -> Option { - // Skip empty carrier turns. - if turn.text.is_empty() && turn.tool_uses.is_empty() && turn.thinking.is_none() { - return None; - } - - let (actor, role_str) = resolve_actor(turn, actors); - - let mut convo_extra: HashMap = HashMap::new(); - convo_extra.insert("role".into(), json!(role_str)); - if !turn.text.is_empty() { - convo_extra.insert("text".into(), json!(turn.text)); - } - if let Some(th) = turn.thinking.as_deref() - && !th.is_empty() - { - convo_extra.insert("thinking".into(), json!(th)); - } - if !turn.tool_uses.is_empty() { - let calls: Vec = turn - .tool_uses - .iter() - .map(|tu| { - json!({ - "name": tu.name, - "call_id": tu.id, - "category": tu.category, - "summary": tool_call_summary(tu), - "status": if let Some(r) = tu.result.as_ref() { - if r.is_error { "error" } else { "success" } - } else { "pending" }, - }) - }) - .collect(); - convo_extra.insert("tool_calls".into(), Value::Array(calls)); - } - if let Some(u) = turn.token_usage.as_ref() { - convo_extra.insert("token_usage".into(), json!(u)); - } - if let Some(sr) = turn.stop_reason.as_deref() - && !sr.is_empty() - { - convo_extra.insert("stop_reason".into(), json!(sr)); - } - - let convo_change = ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.append".to_string(), - extra: convo_extra, - }), - }; - - let mut changes: HashMap = HashMap::new(); - changes.insert(convo_artifact.to_string(), convo_change); - - // Extract snapshot pair (before, after) for this turn. - let snapshots = turn - .extra - .get("opencode") - .and_then(|oc| oc.get("snapshots")) - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str().map(str::to_string)) - .collect::>() - }) - .unwrap_or_default(); - let (before, after) = match (snapshots.first(), snapshots.last()) { - (Some(first), Some(last)) => { - // The "before" state is whichever is earlier: the snapshot - // the previous turn ended on, or the first snapshot of - // this turn (which usually match). Prefer the prior-turn's - // ending snapshot — it captures the pre-step state even - // when this turn's first step-start is missing. - let b = prev_snapshot_after.clone().unwrap_or_else(|| first.clone()); - (Some(b), Some(last.clone())) - } - _ => (None, None), - }; - - // First pass: pull real unified diffs from the snapshot repo for - // files opencode could see (i.e. not gitignored). - if let (Some(b), Some(a), Some(repo)) = (&before, &after, snapshot_repo.as_ref()) - && b != a - { - match diff_trees(repo, b, a) { - Ok(file_changes) => { - for (file_path, artifact_change) in file_changes { - if files_seen.insert(file_path.clone()) { - all_files.push(file_path.clone()); - } - changes.insert(file_path, artifact_change); - } - } - Err(e) => { - eprintln!( - "Warning: snapshot diff {}..{} failed: {}", - &b[..b.len().min(8)], - &a[..a.len().min(8)], - e - ); - } - } - } - - // Second pass: catch files opencode could NOT see in the snapshot - // repo — either because there's no snapshot repo, no snapshot pair, - // or the pair's diff was empty (common when the target files are - // under a .gitignored path). Use tool inputs as the best available - // evidence; no `raw` perspective, but the path and operation still - // land on the step. - for tu in &turn.tool_uses { - let Some(path) = tool_input_file_path(tu) else { - continue; - }; - if changes.contains_key(&path) { - continue; - } - if files_seen.insert(path.clone()) { - all_files.push(path.clone()); - } - let op = tool_to_operation(&tu.name); - let mut extra = HashMap::new(); - extra.insert("operation".into(), json!(op)); - extra.insert("tool".into(), json!(tu.name)); - extra.insert( - "source".into(), - json!(if snapshot_repo.is_some() { - "tool_input_gitignored" - } else { - "tool_input" - }), - ); - changes.insert( - path, - ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: format!("opencode.{}", op), - extra, - }), - }, - ); - } - - // Advance prev_snapshot_after for the next turn. - if let Some(a) = &after { - *prev_snapshot_after = Some(a.clone()); - } - - let step_id = format!("step-{:04}", turn_idx + 1); - let parents = parent_id.map(|p| vec![p.to_string()]).unwrap_or_default(); - - Some(Step { - step: StepIdentity { - id: step_id, - parents, - actor, - timestamp: turn.timestamp.clone(), - }, - change: changes, - meta: None, - }) -} - -fn resolve_actor( - turn: &Turn, - actors: &mut HashMap, -) -> (String, &'static str) { - match &turn.role { - Role::User => { - actors - .entry("human:user".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("User".to_string()), - ..Default::default() - }); - ("human:user".to_string(), "user") - } - Role::Assistant => { - let (key, model_str) = match &turn.model { - Some(m) if !m.is_empty() => (format!("agent:{}", m), m.clone()), - _ => ("agent:opencode".to_string(), "opencode".to_string()), - }; - let provider = turn - .extra - .get("opencode") - .and_then(|oc| oc.get("providerID")) - .and_then(|v| v.as_str()) - .map(str::to_string); - actors - .entry(key.clone()) - .or_insert_with(|| ActorDefinition { - name: Some("opencode".to_string()), - provider: provider.clone(), - model: Some(model_str.clone()), - identities: provider - .map(|p| { - vec![Identity { - system: p, - id: model_str, - }] - }) - .unwrap_or_default(), - ..Default::default() - }); - (key, "assistant") - } - Role::System => { - actors - .entry("system:opencode".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("opencode system".to_string()), - ..Default::default() - }); - ("system:opencode".to_string(), "system") - } - Role::Other(s) => { - let key = format!("other:{}", s); - actors - .entry(key.clone()) - .or_insert_with(|| ActorDefinition { - name: Some(s.clone()), - ..Default::default() - }); - (key, "other") - } - } -} - -fn tool_call_summary(tu: &toolpath_convo::ToolInvocation) -> String { - let pick = |k: &str| -> Option { - tu.input.get(k).and_then(|v| v.as_str()).map(str::to_string) - }; - let s = match tu.name.as_str() { - "bash" | "shell" | "exec" => pick("command").or_else(|| pick("cmd")), - "read" | "list" | "view" | "ls" => pick("filePath").or_else(|| pick("path")), - "write" | "edit" | "multiedit" | "patch" => pick("filePath") - .or_else(|| pick("file_path")) - .or_else(|| pick("path")), - "glob" | "grep" | "search" => pick("pattern").or_else(|| pick("query")), - "webfetch" | "fetch" => pick("url"), - "websearch" => pick("query"), - "task" | "agent" | "spawn_agent" => pick("prompt").or_else(|| pick("task")), - _ => None, + to_view_with_resolver(session, resolver) }; - s.unwrap_or_default() -} - -fn tool_input_file_path(tu: &toolpath_convo::ToolInvocation) -> Option { - tu.input - .get("filePath") - .or_else(|| tu.input.get("file_path")) - .or_else(|| tu.input.get("path")) - .and_then(|v| v.as_str()) - .map(str::to_string) -} - -fn tool_to_operation(name: &str) -> &'static str { - match name { - "write" => "add", - "edit" | "multiedit" | "patch" => "update", - "delete" | "rm" => "delete", - _ => "touch", - } -} - -fn diff_trees( - repo: &git2::Repository, - before: &str, - after: &str, -) -> Result, git2::Error> { - let before_obj = repo.revparse_single(before)?; - let after_obj = repo.revparse_single(after)?; - let before_tree = before_obj.peel_to_tree()?; - let after_tree = after_obj.peel_to_tree()?; - - let mut opts = git2::DiffOptions::new(); - opts.context_lines(3); - opts.include_ignored(false); - opts.ignore_submodules(true); - let diff = repo.diff_tree_to_tree(Some(&before_tree), Some(&after_tree), Some(&mut opts))?; - - // Collect unified-diff text + typed op per file. - let mut by_path: HashMap)> = HashMap::new(); - - diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| { - let Some(new_path) = delta.new_file().path() else { - // Handle delete: old_file path, no new - if let Some(old) = delta.old_file().path() { - let buf = by_path - .entry(old.to_path_buf()) - .or_insert_with(|| (String::new(), "delete", None)); - append_diff_line(&mut buf.0, line); - } - return true; - }; - let op = classify_delta(&delta); - let entry = by_path.entry(new_path.to_path_buf()).or_insert_with(|| { - ( - String::new(), - op, - delta.old_file().path().map(|p| p.to_path_buf()), - ) - }); - append_diff_line(&mut entry.0, line); - true - })?; - - let mut out: Vec<(String, ArtifactChange)> = Vec::new(); - for (path, (raw_diff, op, old_path)) in by_path { - let file_str = path.to_string_lossy().to_string(); - let mut extra = HashMap::new(); - extra.insert("operation".into(), json!(op)); - if op == "rename" - && let Some(old) = &old_path - { - extra.insert("from".into(), json!(old.to_string_lossy())); + let base_uri = config.project_path.as_ref().map(|p| { + if p.starts_with('/') { + format!("file://{}", p) + } else { + p.clone() } - out.push(( - file_str, - ArtifactChange { - raw: if raw_diff.is_empty() { - None - } else { - Some(raw_diff) - }, - structural: Some(StructuralChange { - change_type: format!("opencode.{}", op), - extra, - }), - }, - )); - } - // Stable ordering for reproducibility. - out.sort_by(|a, b| a.0.cmp(&b.0)); - Ok(out) -} - -fn classify_delta(delta: &git2::DiffDelta) -> &'static str { - use git2::Delta; - match delta.status() { - Delta::Added => "add", - Delta::Deleted => "delete", - Delta::Modified => "update", - Delta::Renamed => "rename", - Delta::Copied => "copy", - Delta::Typechange => "update", - _ => "update", - } -} - -fn append_diff_line(buf: &mut String, line: git2::DiffLine<'_>) { - use git2::DiffLineType; - let prefix = match line.origin_value() { - DiffLineType::Context => " ", - DiffLineType::Addition => "+", - DiffLineType::Deletion => "-", - DiffLineType::ContextEOFNL | DiffLineType::AddEOFNL | DiffLineType::DeleteEOFNL => "", - _ => "", + }); + let cfg = toolpath_convo::DeriveConfig { + base_uri, + title: Some(format!("opencode session: {}", session.title)), + ..Default::default() }; - buf.push_str(prefix); - if let Ok(s) = std::str::from_utf8(line.content()) { - buf.push_str(s); - } + toolpath_convo::derive_path(&view, &cfg) } -// Keep tool_category reachable — the match in provider.rs is what -// populates categories, but consumers importing `derive` only may -// want the classifier too. -#[allow(dead_code)] -fn _use_tool_category(name: &str) -> Option { - tool_category(name) +/// Derive a `Path` from multiple sessions. +pub fn derive_project(sessions: &[Session], config: &DeriveConfig) -> Vec { + sessions.iter().map(|s| derive_path(s, config)).collect() } -#[allow(dead_code)] -fn _use_stdpath(_: &StdPath) {} - #[cfg(test)] mod tests { use super::*; @@ -576,51 +67,63 @@ mod tests { use tempfile::TempDir; use toolpath::v1::Graph; + /// Fixture with the real opencode schema (matches what the SQLite + /// reader expects) but no snapshot git repo on disk. Tests run with + /// `no_snapshot_diffs: true` so the tool-input fallback kicks in. fn fixture(body_sql: &str) -> (TempDir, OpencodeConvo, PathResolver) { let temp = TempDir::new().unwrap(); - let data = temp.path().join(".local/share/opencode"); - fs::create_dir_all(&data).unwrap(); - let conn = Connection::open(data.join("opencode.db")).unwrap(); + let data_dir = temp.path().join(".local/share/opencode"); + fs::create_dir_all(&data_dir).unwrap(); + let db_path = data_dir.join("opencode.db"); + let conn = Connection::open(&db_path).unwrap(); conn.execute_batch(&format!( r#" - CREATE TABLE project (id text PRIMARY KEY, worktree text NOT NULL, vcs text, name text, - icon_url text, icon_color text, time_created integer NOT NULL, time_updated integer NOT NULL, - time_initialized integer, sandboxes text NOT NULL, commands text); - CREATE TABLE session (id text PRIMARY KEY, project_id text NOT NULL, parent_id text, - slug text NOT NULL, directory text NOT NULL, title text NOT NULL, version text NOT NULL, - share_url text, summary_additions integer, summary_deletions integer, summary_files integer, - summary_diffs text, revert text, permission text, - time_created integer NOT NULL, time_updated integer NOT NULL, - time_compacting integer, time_archived integer, workspace_id text); - CREATE TABLE message (id text PRIMARY KEY, session_id text NOT NULL, - time_created integer NOT NULL, time_updated integer NOT NULL, data text NOT NULL); - CREATE TABLE part (id text PRIMARY KEY, message_id text NOT NULL, session_id text NOT NULL, - time_created integer NOT NULL, time_updated integer NOT NULL, data text NOT NULL); + CREATE TABLE project ( + id text PRIMARY KEY, worktree text NOT NULL, vcs text, name text, + icon_url text, icon_color text, + time_created integer NOT NULL, time_updated integer NOT NULL, + time_initialized integer, sandboxes text NOT NULL, commands text + ); + CREATE TABLE session ( + id text PRIMARY KEY, project_id text NOT NULL, parent_id text, + slug text NOT NULL, directory text NOT NULL, title text NOT NULL, + version text NOT NULL, share_url text, + summary_additions integer, summary_deletions integer, + summary_files integer, summary_diffs text, revert text, permission text, + time_created integer NOT NULL, time_updated integer NOT NULL, + time_compacting integer, time_archived integer, workspace_id text + ); + CREATE TABLE message ( + id text PRIMARY KEY, session_id text NOT NULL, + time_created integer NOT NULL, time_updated integer NOT NULL, + data text NOT NULL + ); + CREATE TABLE part ( + id text PRIMARY KEY, message_id text NOT NULL, session_id text NOT NULL, + time_created integer NOT NULL, time_updated integer NOT NULL, + data text NOT NULL + ); {body_sql} - "# + "# )) .unwrap(); drop(conn); let resolver = PathResolver::new() .with_home(temp.path()) - .with_data_dir(&data); - ( - temp, - OpencodeConvo::with_resolver(resolver.clone()), - resolver, - ) + .with_data_dir(&data_dir); + let mgr = OpencodeConvo::with_resolver(resolver.clone()); + (temp, mgr, resolver) } const BASIC_SQL: &str = r#" INSERT INTO project (id, worktree, time_created, time_updated, sandboxes) VALUES ('proj_sha', '/tmp/proj', 1000, 3000, '[]'); - INSERT INTO session (id, project_id, slug, directory, title, version, time_created, time_updated) - VALUES ('ses_abc123', 'proj_sha', 'slug', '/tmp/proj', 'Build pickle', '1.3.10', 1000, 3000); + INSERT INTO session (id, project_id, slug, directory, title, version, + time_created, time_updated) + VALUES ('ses_abc123', 'proj_sha', 'pickle-a-thing', '/tmp/proj', 'Pickle a thing', '0.10.0', 1000, 1100); INSERT INTO message (id, session_id, time_created, time_updated, data) VALUES - ('m1','ses_abc123',1001,1001, - '{"role":"user","time":{"created":1001},"agent":"build","model":{"providerID":"opencode","modelID":"big-pickle"}}'), - ('m2','ses_abc123',1002,1100, - '{"parentID":"m1","role":"assistant","mode":"build","agent":"build","path":{"cwd":"/tmp/proj","root":"/tmp/proj"},"cost":0.01,"tokens":{"input":10,"output":5,"reasoning":0,"cache":{"read":0,"write":0}},"modelID":"claude-sonnet-4-6","providerID":"anthropic","time":{"created":1002,"completed":1100},"finish":"stop"}'); + ('m1', 'ses_abc123', 1001, 1001, '{"role":"user","time":{"created":1001},"agent":"build","model":{"providerID":"opencode","modelID":"big-pickle"}}'), + ('m2', 'ses_abc123', 1002, 1100, '{"parentID":"m1","role":"assistant","mode":"build","agent":"build","path":{"cwd":"/tmp/proj","root":"/tmp/proj"},"cost":0.01,"tokens":{"input":10,"output":5,"reasoning":0,"cache":{"read":0,"write":0}},"modelID":"claude-sonnet-4-6","providerID":"anthropic","time":{"created":1002,"completed":1100},"finish":"stop"}'); INSERT INTO part (id, message_id, session_id, time_created, time_updated, data) VALUES ('p1','m1','ses_abc123',1001,1001,'{"type":"text","text":"make a pickle"}'), ('p2','m2','ses_abc123',1002,1002,'{"type":"step-start","snapshot":"snap_a"}'), @@ -648,14 +151,14 @@ mod tests { p.path.base.as_ref().unwrap().ref_str.as_deref(), Some("proj_sha") ); - // 2 messages → 2 steps. - assert_eq!(p.steps.len(), 2); - // Head matches last step. - assert_eq!(p.path.head, p.steps.last().unwrap().step.id); + // 2 messages → 2 turns (both have content/tool calls). + assert_eq!(p.steps.iter().filter(|s| { + s.change.values().any(|c| c.structural.as_ref().is_some_and(|sc| sc.change_type == "conversation.append")) + }).count(), 2); } #[test] - fn derive_validates() { + fn derive_emits_producer() { let (_t, mgr, resolver) = fixture(BASIC_SQL); let s = mgr.read_session("ses_abc123").unwrap(); let p = derive_path_with_resolver( @@ -666,16 +169,13 @@ mod tests { }, &resolver, ); - let doc = Graph::from_path(p); - let json = doc.to_json().unwrap(); - let parsed = Graph::from_json(&json).unwrap(); - let pp = parsed.single_path().expect("single-path graph"); - let anc = toolpath::v1::query::ancestors(&pp.steps, &pp.path.head); - assert_eq!(anc.len(), pp.steps.len(), "all steps on head ancestry"); + let producer = p.meta.as_ref().unwrap().extra.get("producer").unwrap(); + assert_eq!(producer["name"], "opencode"); + assert_eq!(producer["version"], "0.10.0"); } #[test] - fn derive_actors_populated() { + fn derive_fallback_file_mutation_from_tool() { let (_t, mgr, resolver) = fixture(BASIC_SQL); let s = mgr.read_session("ses_abc123").unwrap(); let p = derive_path_with_resolver( @@ -686,17 +186,25 @@ mod tests { }, &resolver, ); - let actors = p.meta.as_ref().unwrap().actors.as_ref().unwrap(); - assert!(actors.contains_key("human:user")); - assert!(actors.contains_key("agent:claude-sonnet-4-6")); + // The assistant turn's `write` tool produces a sibling `file.write` + // entry via the tool-input fallback (no snapshot repo on disk). + let file_step = p + .steps + .iter() + .find(|s| s.change.contains_key("/tmp/proj/main.cpp")) + .expect("no step carries the file artifact"); + let change = &file_step.change["/tmp/proj/main.cpp"]; + let structural = change.structural.as_ref().unwrap(); + assert_eq!(structural.change_type, "file.write"); + assert_eq!(structural.extra["operation"], "add"); + // tool_id links back to the write tool. + assert_eq!(structural.extra["tool_id"], "c1"); } #[test] - fn derive_fallback_file_artifact_from_tool() { + fn derive_validates_as_single_path_graph() { let (_t, mgr, resolver) = fixture(BASIC_SQL); let s = mgr.read_session("ses_abc123").unwrap(); - // With no_snapshot_diffs, derive uses the tool-input fallback - // to record which files were touched. let p = derive_path_with_resolver( &s, &DeriveConfig { @@ -705,100 +213,11 @@ mod tests { }, &resolver, ); - let file_step = p - .steps - .iter() - .find(|s| s.change.contains_key("/tmp/proj/main.cpp")) - .expect("file artifact missing"); - let change = &file_step.change["/tmp/proj/main.cpp"]; - assert!( - change.raw.is_none(), - "no snapshot repo → no raw perspective" - ); - assert_eq!( - change.structural.as_ref().unwrap().change_type, - "opencode.add" - ); - } - - #[test] - fn derive_uses_snapshot_git_when_available() { - // Build a real snapshot git repo on disk with two trees (before - // and after) and check that derive populates the raw unified diff. - let (_t, mgr, resolver) = fixture(BASIC_SQL); - let session = mgr.read_session("ses_abc123").unwrap(); - - let gitdir = resolver - .snapshot_gitdir(&session.project_id, &session.directory) - .unwrap(); - fs::create_dir_all(&gitdir).unwrap(); - let repo = git2::Repository::init_bare(&gitdir).unwrap(); - - // Build "before" tree with only a README. - let before_tree = { - let mut tb = repo.treebuilder(None).unwrap(); - let blob = repo.blob(b"hello\n").unwrap(); - tb.insert("README", blob, 0o100644).unwrap(); - tb.write().unwrap() - }; - // Build "after" tree with README + main.cpp. - let after_tree = { - let mut tb = repo.treebuilder(None).unwrap(); - let readme = repo.blob(b"hello\n").unwrap(); - tb.insert("README", readme, 0o100644).unwrap(); - let main = repo.blob(b"int main(){ return 0; }\n").unwrap(); - tb.insert("main.cpp", main, 0o100644).unwrap(); - tb.write().unwrap() - }; - - // Rewrite the session's snapshot SHAs in the DB to point at - // these real trees. Easier: point snap_a/snap_b at them by - // writing refs. - repo.reference("refs/snapshots/snap_a", before_tree, true, "before") - .unwrap(); - repo.reference("refs/snapshots/snap_b", after_tree, true, "after") - .unwrap(); - - // Edit the SQLite to replace snap_a/snap_b part data with - // strings that git2's revparse can resolve directly. Use the - // raw tree SHA hex strings. - let conn = rusqlite::Connection::open(resolver.db_path().unwrap()).unwrap(); - conn.execute( - "UPDATE part SET data = REPLACE(data, 'snap_a', ?1) WHERE id = 'p2'", - rusqlite::params![before_tree.to_string()], - ) - .unwrap(); - conn.execute( - "UPDATE part SET data = REPLACE(data, 'snap_b', ?1) WHERE id = 'p5'", - rusqlite::params![after_tree.to_string()], - ) - .unwrap(); - drop(conn); - - let session = mgr.read_session("ses_abc123").unwrap(); - let p = derive_path_with_resolver(&session, &DeriveConfig::default(), &resolver); - - let file_step = p - .steps - .iter() - .find(|s| s.change.contains_key("main.cpp")) - .expect("main.cpp artifact missing"); - let change = &file_step.change["main.cpp"]; - assert!( - change.raw.is_some(), - "raw unified diff should be populated from the snapshot repo" - ); - assert!( - change - .raw - .as_ref() - .unwrap() - .contains("+int main(){ return 0; }"), - "diff must include the new content" - ); - assert_eq!( - change.structural.as_ref().unwrap().change_type, - "opencode.add" - ); + let doc = Graph::from_path(p); + let json = doc.to_json().unwrap(); + let parsed = Graph::from_json(&json).unwrap(); + let pp = parsed.single_path().expect("single-path graph"); + let anc = toolpath::v1::query::ancestors(&pp.steps, &pp.path.head); + assert_eq!(anc.len(), pp.steps.len(), "all steps on head ancestry"); } } diff --git a/crates/toolpath-opencode/src/project.rs b/crates/toolpath-opencode/src/project.rs index 32a0e94..040161d 100644 --- a/crates/toolpath-opencode/src/project.rs +++ b/crates/toolpath-opencode/src/project.rs @@ -768,6 +768,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } @@ -786,6 +787,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-opencode/src/provider.rs b/crates/toolpath-opencode/src/provider.rs index 227b14a..eb3b90e 100644 --- a/crates/toolpath-opencode/src/provider.rs +++ b/crates/toolpath-opencode/src/provider.rs @@ -40,8 +40,8 @@ use crate::types::{ }; use toolpath_convo::{ ConversationEvent, ConversationMeta, ConversationProvider, ConversationView, - ConvoError as ConvoTraitError, DelegatedWork, EnvironmentSnapshot, Role, TokenUsage, - ToolCategory, ToolInvocation, ToolResult, Turn, + ConvoError as ConvoTraitError, DelegatedWork, EnvironmentSnapshot, FileMutation, ProducerInfo, + Role, SessionBase, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, }; /// Provider for opencode sessions. @@ -149,9 +149,17 @@ pub fn native_name(category: ToolCategory, args: &Value) -> Option<&'static str> // ── Session → ConversationView ───────────────────────────────────── /// Convert a parsed opencode [`Session`] to the provider-agnostic -/// [`ConversationView`] shape. +/// [`ConversationView`] shape. File mutations from the snapshot git repo +/// are not populated; use [`to_view_with_resolver`] when you have one. pub fn to_view(session: &Session) -> ConversationView { - Builder::new(session).build() + to_view_with_resolver(session, &PathResolver::new()) +} + +/// Like [`to_view`] but opens opencode's snapshot git repository via the +/// resolver and pre-resolves each turn's file mutations against the +/// snapshot pair. Falls back silently when the repo isn't present. +pub fn to_view_with_resolver(session: &Session, resolver: &PathResolver) -> ConversationView { + Builder::new(session).build_with_resolver(resolver) } struct Builder<'a> { @@ -177,6 +185,53 @@ impl<'a> Builder<'a> { } } + fn build_with_resolver(self, resolver: &PathResolver) -> ConversationView { + let session_version = self.session.version.clone(); + let session_directory = self.session.directory.to_string_lossy().to_string(); + let session_project_id = self.session.project_id.clone(); + let snapshot_repo = resolver + .snapshot_gitdir(&session_project_id, &self.session.directory) + .ok() + .and_then(|gd| git2::Repository::open(gd).ok()); + + let mut view = self.build(); + attach_snapshot_diffs(&mut view, snapshot_repo.as_ref()); + attach_tool_input_fallbacks(&mut view); + + // Producer + base. + view.producer = Some(ProducerInfo { + name: "opencode".into(), + version: Some(session_version), + }); + view.base = Some(SessionBase { + working_dir: Some(session_directory), + vcs_revision: Some(session_project_id), + vcs_branch: None, + vcs_remote: None, + }); + + // opencode's wire format carries `parentID` on assistant messages + // pointing back at the previous user message — that's the natural + // chain. User messages legitimately have no parent. Don't + // synthesize anything here (would break the matrix idempotence: + // user turns would gain a synthetic parent that the projector + // can't preserve, causing parent_id graphs to diverge across + // iterations). + + // Refresh files_changed so it matches what landed on turns. + let mut seen = std::collections::HashSet::new(); + let mut ordered = Vec::new(); + for turn in &view.turns { + for fm in &turn.file_mutations { + if seen.insert(fm.path.clone()) { + ordered.push(fm.path.clone()); + } + } + } + view.files_changed = ordered; + view + } + fn build(mut self) -> ConversationView { for msg in &self.session.messages { match &msg.data { @@ -255,6 +310,7 @@ impl<'a> Builder<'a> { environment, delegations: Vec::new(), extra, + file_mutations: Vec::new(), }); } @@ -449,6 +505,7 @@ impl<'a> Builder<'a> { environment, delegations, extra, + file_mutations: Vec::new(), }); } } @@ -651,6 +708,196 @@ impl ConversationProvider for OpencodeConvo { } } +// ── Snapshot diff / tool-input fallback ──────────────────────────────── + +/// For each assistant turn, walk its `extra["opencode"]["snapshots"]` +/// across turns and populate `Turn.file_mutations` from the git2 +/// tree↔tree diff of the snapshot pair. No-op when `repo` is `None`. +fn attach_snapshot_diffs(view: &mut ConversationView, repo: Option<&git2::Repository>) { + let Some(repo) = repo else { return }; + let mut prev_after: Option = None; + for turn in view.turns.iter_mut() { + let snapshots: Vec = turn + .extra + .get("opencode") + .and_then(|oc| oc.get("snapshots")) + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str().map(str::to_string)) + .collect() + }) + .unwrap_or_default(); + let (Some(first), Some(last)) = (snapshots.first(), snapshots.last()) else { + continue; + }; + // Prefer the previous turn's `after` snapshot as `before`. + let before = prev_after.clone().unwrap_or_else(|| first.clone()); + let after = last.clone(); + prev_after = Some(after.clone()); + if before == after { + continue; + } + match diff_trees(repo, &before, &after) { + Ok(mutations) => { + for fm in mutations { + turn.file_mutations.push(fm); + } + } + Err(e) => { + eprintln!( + "Warning: snapshot diff {}..{} failed: {}", + &before[..before.len().min(8)], + &after[..after.len().min(8)], + e + ); + } + } + } +} + +/// For each file-write tool invocation whose path isn't already covered +/// by a snapshot-diff `FileMutation`, synthesize a no-raw mutation +/// attributed to the tool. Catches files opencode wrote that are +/// gitignored (so the snapshot pair shows no change) and the case +/// where there's no snapshot repo at all. +fn attach_tool_input_fallbacks(view: &mut ConversationView) { + for turn in view.turns.iter_mut() { + let existing: std::collections::HashSet = turn + .file_mutations + .iter() + .map(|fm| fm.path.clone()) + .collect(); + let mut extras: Vec = Vec::new(); + for tu in &turn.tool_uses { + let Some(path) = tool_input_file_path(tu) else { + continue; + }; + if existing.contains(&path) { + continue; + } + extras.push(FileMutation { + path, + tool_id: Some(tu.id.clone()), + operation: Some(tool_to_operation(&tu.name).to_string()), + ..Default::default() + }); + } + turn.file_mutations.extend(extras); + } +} + +fn tool_input_file_path(tu: &ToolInvocation) -> Option { + tu.input + .get("filePath") + .or_else(|| tu.input.get("file_path")) + .or_else(|| tu.input.get("path")) + .and_then(|v| v.as_str()) + .map(str::to_string) +} + +fn tool_to_operation(name: &str) -> &'static str { + match name { + "write" => "add", + "edit" | "multiedit" | "patch" => "update", + "delete" | "rm" => "delete", + _ => "touch", + } +} + +fn diff_trees( + repo: &git2::Repository, + before: &str, + after: &str, +) -> std::result::Result, git2::Error> { + let before_obj = repo.revparse_single(before)?; + let after_obj = repo.revparse_single(after)?; + let before_tree = before_obj.peel_to_tree()?; + let after_tree = after_obj.peel_to_tree()?; + + let mut opts = git2::DiffOptions::new(); + opts.context_lines(3); + opts.include_ignored(false); + opts.ignore_submodules(true); + let diff = repo.diff_tree_to_tree(Some(&before_tree), Some(&after_tree), Some(&mut opts))?; + + use std::path::PathBuf; + let mut by_path: HashMap)> = HashMap::new(); + + diff.print(git2::DiffFormat::Patch, |delta, _hunk, line| { + let Some(new_path) = delta.new_file().path() else { + if let Some(old) = delta.old_file().path() { + let buf = by_path + .entry(old.to_path_buf()) + .or_insert_with(|| (String::new(), "delete", None)); + append_diff_line(&mut buf.0, line); + } + return true; + }; + let op = classify_delta(&delta); + let entry = by_path.entry(new_path.to_path_buf()).or_insert_with(|| { + ( + String::new(), + op, + delta.old_file().path().map(|p| p.to_path_buf()), + ) + }); + append_diff_line(&mut entry.0, line); + true + })?; + + let mut out: Vec = by_path + .into_iter() + .map(|(path, (raw_diff, op, old_path))| FileMutation { + path: path.to_string_lossy().into_owned(), + tool_id: None, + operation: Some(op.to_string()), + raw_diff: if raw_diff.is_empty() { + None + } else { + Some(raw_diff) + }, + before: None, + after: None, + rename_to: if op == "rename" { + old_path.map(|p| p.to_string_lossy().into_owned()) + } else { + None + }, + }) + .collect(); + out.sort_by(|a, b| a.path.cmp(&b.path)); + Ok(out) +} + +fn classify_delta(delta: &git2::DiffDelta) -> &'static str { + use git2::Delta; + match delta.status() { + Delta::Added => "add", + Delta::Deleted => "delete", + Delta::Modified => "update", + Delta::Renamed => "rename", + Delta::Copied => "copy", + Delta::Typechange => "update", + _ => "update", + } +} + +fn append_diff_line(buf: &mut String, line: git2::DiffLine<'_>) { + use git2::DiffLineType; + let prefix = match line.origin_value() { + DiffLineType::Context => " ", + DiffLineType::Addition => "+", + DiffLineType::Deletion => "-", + DiffLineType::ContextEOFNL | DiffLineType::AddEOFNL | DiffLineType::DeleteEOFNL => "", + _ => "", + }; + buf.push_str(prefix); + if let Ok(s) = std::str::from_utf8(line.content()) { + buf.push_str(s); + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/toolpath-pi/src/project.rs b/crates/toolpath-pi/src/project.rs index 84b2570..6bd4507 100644 --- a/crates/toolpath-pi/src/project.rs +++ b/crates/toolpath-pi/src/project.rs @@ -773,6 +773,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } @@ -796,6 +797,7 @@ mod tests { environment: None, delegations: vec![], extra: HashMap::new(), + file_mutations: Vec::new(), } } diff --git a/crates/toolpath-pi/src/provider.rs b/crates/toolpath-pi/src/provider.rs index 9f44439..eb90117 100644 --- a/crates/toolpath-pi/src/provider.rs +++ b/crates/toolpath-pi/src/provider.rs @@ -343,6 +343,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { environment: Some(env.clone()), delegations: vec![], extra: extra_map, + file_mutations: Vec::new(), }); } @@ -386,6 +387,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { environment: Some(env.clone()), delegations: vec![], extra: extra_map, + file_mutations: Vec::new(), }); } @@ -422,6 +424,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { environment: Some(env.clone()), delegations: vec![], extra: extra_map, + file_mutations: Vec::new(), }); } @@ -463,6 +466,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { environment: Some(env.clone()), delegations: vec![], extra: extra_map, + file_mutations: Vec::new(), }); } @@ -684,6 +688,7 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { environment: Some(env.clone()), delegations, extra: extra_map, + file_mutations: Vec::new(), }); } } From 08e5aa12101b3cb03f15f6737b1abe6adbc78344 Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 15:44:42 -0400 Subject: [PATCH 06/10] toolpath-claude: migrate to shared derive_path; biggest provider unified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude was the most invasive migration — `derive.rs` shrinks from 2759 lines to ~165. All Claude-specific work moves into `provider.rs`: - `conversation_to_view` now pulls `view.base` (working_dir, vcs_branch from the first entry that has them) and `view.producer` (name: "claude-code", version from the first entry's `version`). - New `compute_file_mutations` helper populates `Turn.file_mutations` for every `FileWrite`-category tool invocation. Calls `git show HEAD:` via the moved-in `git_head_content` to resolve `Write`'s pre-image, then `toolpath_convo::file_write_diff` to synthesize the unified diff. `tool_id` on each mutation links back to the invocation. Mutation `operation` ("add" for `Write`, "update" for `Edit` / `MultiEdit` / `NotebookEdit`) and `after` (Write content) populate where they're available. Behavior changes for claude-derived paths: - Tool executions no longer become their own `tool.invoke` steps (actor `agent:claude-code/tool:`). File mutations attach to the parent assistant turn step as sibling `file.write` change entries, like every other provider. - Conversation-artifact key unifies on `claude-code://` (was `agent://claude/`). - No more `conversation.init` step. The cwd / vcs_branch / version it carried lived only at the path level; they now ride `view.base` and `meta.extra.producer`, which both round-trip through extract. - Step ids are claude entry UUIDs (already what claude emitted). `derive.rs` reduced to: wrap with title override + `view = to_view()` + delegate to `toolpath_convo::derive_path`. The 100+ tests it carried collapse to four smoke checks (basic shape, producer present, actors populated, single-path ancestry). The covered behaviors live elsewhere now: shared derive_path tests in `toolpath-convo`, view-shape tests in `toolpath-claude/src/provider.rs` (38 tests, unchanged), and fidelity-roundtrip tests in `toolpath-claude/tests/` (18 tests, unchanged). Cross-harness `matrix_translation` test passes for all 25 cells. --- crates/toolpath-claude/src/derive.rs | 2778 +----------------------- crates/toolpath-claude/src/provider.rs | 138 +- 2 files changed, 242 insertions(+), 2674 deletions(-) diff --git a/crates/toolpath-claude/src/derive.rs b/crates/toolpath-claude/src/derive.rs index 17672bb..1fc1bbe 100644 --- a/crates/toolpath-claude/src/derive.rs +++ b/crates/toolpath-claude/src/derive.rs @@ -1,70 +1,16 @@ //! Derive Toolpath documents from Claude conversation logs. //! -//! The conversation itself is treated as an artifact under change. Each turn -//! appends to `agent://claude/` via a `conversation.append` -//! structural operation. Tool invocations produce separate steps with -//! `tool.invoke` structural changes. +//! Thin wrapper around the shared [`toolpath_convo::derive_path`]. All +//! Claude-specific work (cwd / git_branch / version → `view.base` and +//! `view.producer`, headerless preamble + non-message entries → +//! `view.events`, tool-result cross-entry assembly, file-write diff +//! synthesis via `git show HEAD:`) happens in +//! [`crate::provider::to_view`]; nothing provider-specific lives in this +//! module. use crate::provider::to_view; -use crate::types::{ContentPart, Conversation, MessageContent, MessageRole}; -use serde_json::json; -use std::collections::HashMap; -use std::path::Path as FsPath; -use std::process::Command; -use toolpath::v1::{ - ActorDefinition, ArtifactChange, Base, Identity, Path, PathIdentity, PathMeta, Step, - StepIdentity, StructuralChange, -}; -use toolpath_convo::file_write_diff; - -/// Best-effort lookup of a file's contents at `HEAD` in the git repo -/// rooted at `repo_dir` (or one of its ancestors). -/// -/// Shells out to `git show HEAD:`. Returns `None` when -/// any of these hold: `repo_dir` isn't inside a git repo, `path` isn't -/// tracked at `HEAD`, `git` isn't on `PATH`, or the command otherwise -/// fails. Used by the `Write`-tool before-state resolver; callers must -/// fall through to the empty-string diff on `None`. -/// -/// `path` may be absolute or relative. If absolute, it's made relative -/// to `repo_dir` before invoking git; if it doesn't sit beneath -/// `repo_dir`, returns `None`. -fn git_head_content(repo_dir: &str, path: &str) -> Option { - let repo = FsPath::new(repo_dir); - let file = FsPath::new(path); - let rel = if file.is_absolute() { - file.strip_prefix(repo).ok()?.to_path_buf() - } else { - file.to_path_buf() - }; - // `git show HEAD:` expects forward-slash paths. - let rel_str = rel.to_string_lossy().replace('\\', "/"); - let output = Command::new("git") - .arg("-C") - .arg(repo) - .arg("show") - .arg(format!("HEAD:{rel_str}")) - .output() - .ok()?; - if !output.status.success() { - return None; - } - String::from_utf8(output.stdout).ok() -} - -/// Resolve the local working-directory root for a conversation entry, -/// preferring the entry's own `cwd` (accurate per-turn) and falling -/// back to the conversation-level project path. Strips any `file://` -/// prefix the config may have carried. -fn resolve_local_dir<'a>( - config_project: Option<&'a str>, - conversation_project: Option<&'a str>, - entry_cwd: Option<&'a str>, -) -> Option { - let raw = entry_cwd.or(config_project).or(conversation_project)?; - let stripped = raw.strip_prefix("file://").unwrap_or(raw); - Some(stripped.to_string()) -} +use crate::types::Conversation; +use toolpath::v1::Path; /// Configuration for deriving Toolpath documents from Claude conversations. #[derive(Default)] @@ -75,775 +21,63 @@ pub struct DeriveConfig { pub include_thinking: bool, } -/// Map a Claude tool name to a category string. -/// -/// Keep in sync with [`crate::provider::tool_category`] — same table, -/// different return type (string for path-doc serialization vs -/// [`toolpath_convo::ToolCategory`] for in-memory views). -fn tool_category_str(name: &str) -> &'static str { - match name { - "Read" => "file_read", - "Glob" | "Grep" => "file_search", - "Write" | "Edit" | "MultiEdit" | "NotebookEdit" => "file_write", - "Bash" => "shell", - "WebFetch" | "WebSearch" => "network", - "Task" | "Agent" => "delegation", - _ => "unknown", - } -} - -/// Whether a tool operates on files (uses `file_path` input as artifact key). -fn is_file_tool(name: &str) -> bool { - matches!( - name, - "Read" | "Write" | "Edit" | "Glob" | "Grep" | "NotebookEdit" - ) -} - -/// A collected tool use from a content part. -struct ToolUseInfo { - id: String, - name: String, - input: serde_json::Value, -} - -/// Derive a single Toolpath Path from a Claude conversation. -/// -/// The conversation is modeled as an artifact at `agent://claude/`. -/// Each user or assistant turn produces a step whose `change` map contains -/// a `conversation.append` structural change on that artifact. Assistant turns -/// with tool uses additionally produce one step per tool type, each containing -/// `tool.invoke` structural changes. +/// Derive a Toolpath [`Path`] from a Claude [`Conversation`]. pub fn derive_path(conversation: &Conversation, config: &DeriveConfig) -> Path { - let session_short = safe_prefix(&conversation.session_id, 8); - let convo_artifact = format!("agent://claude/{}", conversation.session_id); - - // Build a ConversationView with cross-entry tool result assembly let view = to_view(conversation); - let turn_by_id: HashMap<&str, &toolpath_convo::Turn> = - view.turns.iter().map(|t| (t.id.as_str(), t)).collect(); - - let mut steps = Vec::new(); - let mut last_step_id: Option = None; - let mut actors: HashMap = HashMap::new(); - - // Generate conversation.init step from first entry metadata - let init_step = { - let mut init_extra = HashMap::new(); - for entry in &conversation.entries { - if let Some(cwd) = &entry.cwd { - init_extra.insert("working_dir".to_string(), json!(cwd)); - } - if let Some(branch) = &entry.git_branch { - init_extra.insert("vcs_branch".to_string(), json!(branch)); - } - if let Some(version) = &entry.version { - init_extra.insert("version".to_string(), json!(version)); - } - if !init_extra.is_empty() { - break; - } - } - - if !init_extra.is_empty() { - let mut changes = HashMap::new(); - changes.insert( - convo_artifact.clone(), - ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.init".to_string(), - extra: init_extra, - }), - }, - ); - - let step = Step { - step: StepIdentity { - id: format!("{}-init", conversation.session_id), - parents: vec![], - actor: "tool:claude-code".into(), - timestamp: conversation - .entries - .first() - .map(|e| e.timestamp.clone()) - .unwrap_or_default(), - }, - change: changes, - meta: None, - }; - last_step_id = Some(step.step.id.clone()); - Some(step) + let prefix: String = conversation.session_id.chars().take(8).collect(); + let base_uri = config.project_path.as_ref().map(|p| { + if p.starts_with('/') { + format!("file://{}", p) } else { - None - } + p.clone() + } + }); + let cfg = toolpath_convo::DeriveConfig { + base_uri, + title: Some(format!("Claude session: {}", prefix)), + include_thinking: config.include_thinking, + ..Default::default() }; - - if let Some(init) = init_step { - actors - .entry("tool:claude-code".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("Claude Code".to_string()), - ..Default::default() - }); - steps.push(init); - } - - // Emit headerless preamble lines (ai-title, last-prompt, queue-operation, - // permission-mode, file-history-snapshot) as conversation.event steps so - // they survive the Path round-trip. These don't have a uuid or a message, - // so they live on `conversation.preamble`, not `entries`. - for (idx, raw) in conversation.preamble.iter().enumerate() { - let event_type = raw - .get("type") - .and_then(|v| v.as_str()) - .unwrap_or("preamble") - .to_string(); - let timestamp = raw - .get("timestamp") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - actors - .entry("tool:claude-code".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("Claude Code".to_string()), - ..Default::default() - }); - - // Carry the original line verbatim under `raw`. `entry_type` is the - // line's bare `type`, informational only — a headerless line is - // identified by the presence of `raw`, not an enumerated type list. - let mut event_extra: HashMap = HashMap::new(); - event_extra.insert("entry_type".to_string(), json!(event_type)); - event_extra.insert("raw".to_string(), raw.clone()); - - let parents = last_step_id - .as_ref() - .map(|s| vec![s.clone()]) - .unwrap_or_default(); - let step = Step { - step: StepIdentity { - id: format!("{}-preamble-{}", conversation.session_id, idx), - parents, - actor: "tool:claude-code".into(), - timestamp, - }, - change: { - let mut m = HashMap::new(); - m.insert( - convo_artifact.clone(), - ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.event".to_string(), - extra: event_extra, - }), - }, - ); - m - }, - meta: None, - }; - steps.push(step); - } - - for (entry_idx, entry) in conversation.entries.iter().enumerate() { - // Determine if this is a conversational entry (user/assistant with message) - // or a non-message event entry - let message = entry.message.as_ref(); - let is_conversational = - message.is_some_and(|m| matches!(m.role, MessageRole::User | MessageRole::Assistant)); - - if !is_conversational { - // Event entry — capture as conversation.event step - let step_id = if entry.uuid.is_empty() { - format!("{}-event-{}", conversation.session_id, entry_idx) - } else { - entry.uuid.clone() - }; - - let parents = if let Some(parent) = &entry.parent_uuid { - vec![parent.clone()] - } else if let Some(ref last) = last_step_id { - vec![last.clone()] - } else { - vec![] - }; - - // Register tool:claude-code actor for event entries - actors - .entry("tool:claude-code".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("Claude Code".to_string()), - ..Default::default() - }); - - let mut event_extra = HashMap::new(); - event_extra.insert("entry_type".to_string(), json!(entry.entry_type)); - - if let Some(cwd) = &entry.cwd { - event_extra.insert("cwd".to_string(), json!(cwd)); - } - if let Some(version) = &entry.version { - event_extra.insert("version".to_string(), json!(version)); - } - if let Some(git_branch) = &entry.git_branch { - event_extra.insert("git_branch".to_string(), json!(git_branch)); - } - if let Some(user_type) = &entry.user_type { - event_extra.insert("user_type".to_string(), json!(user_type)); - } - if let Some(snapshot) = &entry.snapshot { - event_extra.insert("snapshot".to_string(), snapshot.clone()); - } - if let Some(tool_use_result) = &entry.tool_use_result { - event_extra.insert("tool_use_result".to_string(), tool_use_result.clone()); - } - if let Some(message_id) = &entry.message_id { - event_extra.insert("message_id".to_string(), json!(message_id)); - } - // Include system message text if present - if let Some(msg) = message { - let text = msg.text(); - if !text.is_empty() { - event_extra.insert("text".to_string(), json!(text)); - } - } - // Entry-level extras - if !entry.extra.is_empty() { - event_extra.insert("entry_extra".to_string(), json!(entry.extra)); - } - - let event_step = Step { - step: StepIdentity { - id: step_id, - parents, - actor: "tool:claude-code".into(), - timestamp: entry.timestamp.clone(), - }, - change: { - let mut m = HashMap::new(); - m.insert( - convo_artifact.clone(), - ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.event".to_string(), - extra: event_extra, - }), - }, - ); - m - }, - meta: None, - }; - - // Event steps do NOT advance last_step_id - steps.push(event_step); - continue; - } - - let message = message.unwrap(); - - let (actor, role_str) = match message.role { - MessageRole::User => { - actors - .entry("human:user".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("User".to_string()), - ..Default::default() - }); - ("human:user".to_string(), "user") - } - MessageRole::Assistant => { - let (actor_key, model_str) = if let Some(model) = &message.model { - (format!("agent:{}", model), model.clone()) - } else { - ("agent:claude-code".to_string(), "claude-code".to_string()) - }; - actors.entry(actor_key.clone()).or_insert_with(|| { - let mut identities = vec![Identity { - system: "anthropic".to_string(), - id: model_str.clone(), - }]; - if let Some(version) = &entry.version { - identities.push(Identity { - system: "claude-code".to_string(), - id: version.clone(), - }); - } - ActorDefinition { - name: Some("Claude Code".to_string()), - provider: Some("anthropic".to_string()), - model: Some(model_str), - identities, - ..Default::default() - } - }); - (actor_key, "assistant") - } - // is_conversational guarantees User or Assistant - MessageRole::System => unreachable!(), - }; - - // Collect conversation text and tool uses from this turn - let mut text_parts: Vec = Vec::new(); - let mut thinking_parts: Vec = Vec::new(); - let mut tool_use_infos: Vec = Vec::new(); - // Track whether the entry had a Thinking content part at all, - // independent of whether its plaintext was empty. Encrypted- - // reasoning blocks ship with `thinking: ""` and a signature, so - // text-emptiness alone isn't a reliable "skip this entry" signal. - let mut had_thinking_part = false; - - match &message.content { - Some(MessageContent::Parts(parts)) => { - for part in parts { - match part { - ContentPart::Text { text } if !text.trim().is_empty() => { - text_parts.push(text.clone()); - } - ContentPart::Thinking { thinking, .. } => { - had_thinking_part = true; - if config.include_thinking && !thinking.trim().is_empty() { - thinking_parts.push(thinking.clone()); - } - } - ContentPart::ToolUse { id, name, input } => { - tool_use_infos.push(ToolUseInfo { - id: id.clone(), - name: name.clone(), - input: input.clone(), - }); - } - _ => {} - } - } - } - Some(MessageContent::Text(text)) if !text.trim().is_empty() => { - text_parts.push(text.clone()); - } - _ => {} - } - - // Collect tool name list for the summary field - let tool_names: Vec = tool_use_infos.iter().map(|t| t.name.clone()).collect(); - - // Tool-result-only user entries — no human text, no thinking, no - // tool_use parts, but they DO have tool_result parts whose UUID is - // referenced by the next assistant turn's parentUuid. Emit them - // as conversation.event steps so the UUID survives the round-trip - // (and the projector can re-attach the original `toolUseResult` - // blob, promptId, slug, etc.). Without this the parent chain - // breaks and Claude's UI orphans every tool result. - let is_tool_result_user = matches!(message.role, MessageRole::User) - && text_parts.is_empty() - && thinking_parts.is_empty() - && !had_thinking_part - && tool_use_infos.is_empty() - && message - .content - .as_ref() - .map(|c| { - matches!(c, MessageContent::Parts(parts) if parts.iter().any(|p| matches!(p, ContentPart::ToolResult { .. }))) - }) - .unwrap_or(false); - - if is_tool_result_user { - // Snapshot every part of the tool-result entry so the projector - // can rebuild it byte-identical. Each tool_result part lands - // under its tool_use_id; outer-entry fields go on the event - // itself. - let mut event_extra: HashMap = HashMap::new(); - event_extra.insert("entry_type".to_string(), json!("tool_result_user")); - if let Some(MessageContent::Parts(parts)) = &message.content { - let mut results: Vec = Vec::new(); - for part in parts { - if let ContentPart::ToolResult { - tool_use_id, - content, - is_error, - } = part - { - results.push(json!({ - "tool_use_id": tool_use_id, - "content": content.text(), - "is_error": is_error, - })); - } - } - event_extra.insert("tool_results".to_string(), json!(results)); - } - if let Some(tur) = &entry.tool_use_result { - event_extra.insert("tool_use_result".to_string(), tur.clone()); - } - if let Some(cwd) = &entry.cwd { - event_extra.insert("cwd".to_string(), json!(cwd)); - } - if let Some(version) = &entry.version { - event_extra.insert("version".to_string(), json!(version)); - } - if let Some(git_branch) = &entry.git_branch { - event_extra.insert("git_branch".to_string(), json!(git_branch)); - } - if let Some(user_type) = &entry.user_type { - event_extra.insert("user_type".to_string(), json!(user_type)); - } - if !entry.extra.is_empty() { - event_extra.insert("entry_extra".to_string(), json!(entry.extra)); - } - - let parents = entry.parent_uuid.iter().cloned().collect::>(); - let step = Step { - step: StepIdentity { - id: entry.uuid.clone(), - parents, - actor: "tool:claude-code".into(), - timestamp: entry.timestamp.clone(), - }, - change: { - let mut m = HashMap::new(); - m.insert( - convo_artifact.clone(), - ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.event".to_string(), - extra: event_extra, - }), - }, - ); - m - }, - meta: None, - }; - actors - .entry("tool:claude-code".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("Claude Code".to_string()), - ..Default::default() - }); - // Don't advance last_step_id — tool-result entries chain off - // the assistant they answered, not each other. - steps.push(step); - continue; - } - - // Skip entries with no conversation content and no tool uses. - // A bare thinking part still counts as content — dropping it loses - // a step in the conversation graph and Claude's UI shows the model - // jumping straight from one tool call to the next. - if text_parts.is_empty() - && thinking_parts.is_empty() - && tool_use_infos.is_empty() - && !had_thinking_part - { - continue; - } - - // Build the conversation artifact change - let mut convo_extra = HashMap::new(); - convo_extra.insert("role".to_string(), json!(role_str)); - if !text_parts.is_empty() { - let combined = text_parts.join("\n\n"); - convo_extra.insert("text".to_string(), json!(combined)); - } - if !thinking_parts.is_empty() { - let combined_thinking = thinking_parts.join("\n\n"); - convo_extra.insert("thinking".to_string(), json!(combined_thinking)); - } - if !tool_names.is_empty() { - convo_extra.insert("tool_uses".to_string(), json!(tool_names)); - } - - // Add model, stop_reason, and usage fields from the message - if let Some(model) = &message.model { - convo_extra.insert("model".to_string(), json!(model)); - } - if let Some(stop_reason) = &message.stop_reason { - convo_extra.insert("stop_reason".to_string(), json!(stop_reason)); - } - if let Some(usage) = &message.usage { - if let Some(input_tokens) = usage.input_tokens { - convo_extra.insert("input_tokens".to_string(), json!(input_tokens)); - } - if let Some(output_tokens) = usage.output_tokens { - convo_extra.insert("output_tokens".to_string(), json!(output_tokens)); - } - if let Some(cache_read) = usage.cache_read_input_tokens { - convo_extra.insert("cache_read_tokens".to_string(), json!(cache_read)); - } - if let Some(cache_write) = usage.cache_creation_input_tokens { - convo_extra.insert("cache_write_tokens".to_string(), json!(cache_write)); - } - } - - // Per-entry metadata for round-trip fidelity - if let Some(cwd) = &entry.cwd { - convo_extra.insert("cwd".to_string(), json!(cwd)); - } - if let Some(version) = &entry.version { - convo_extra.insert("version".to_string(), json!(version)); - } - if let Some(git_branch) = &entry.git_branch { - convo_extra.insert("git_branch".to_string(), json!(git_branch)); - } - if let Some(user_type) = &entry.user_type { - convo_extra.insert("user_type".to_string(), json!(user_type)); - } - if let Some(request_id) = &entry.request_id { - convo_extra.insert("request_id".to_string(), json!(request_id)); - } - // Entry-level extras (isMeta, slug, entrypoint, promptId, etc.) - if !entry.extra.is_empty() { - convo_extra.insert("entry_extra".to_string(), json!(entry.extra)); - } - - let convo_change = ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.append".to_string(), - extra: convo_extra, - }), - }; - - let mut changes = HashMap::new(); - changes.insert(convo_artifact.clone(), convo_change); - - // Build conversation step using full UUID as step ID. - // Always prefer the entry's actual `parent_uuid` — it points at - // the previous JSONL entry the user/assistant turn responded to, - // which may be an attachment, a tool_result_user event, or another - // conversational entry. Falling back to `last_step_id` would - // collapse those into a linear chain and break Claude UI rendering - // (the next assistant's `parentUuid` would skip the entry it - // actually responded to). - let step_id = entry.uuid.clone(); - let parents = if let Some(parent) = &entry.parent_uuid { - vec![parent.clone()] - } else { - last_step_id.iter().cloned().collect() - }; - - let step = Step { - step: StepIdentity { - id: step_id.clone(), - parents, - actor, - timestamp: entry.timestamp.clone(), - }, - change: changes, - meta: None, - }; - - if !entry.is_sidechain { - last_step_id = Some(step_id.clone()); - } - steps.push(step); - - // Emit tool invocation steps (one per tool type, grouped) - if !tool_use_infos.is_empty() { - // Group tool uses by tool name, preserving order of first occurrence - let mut tool_groups: Vec<(String, Vec<&ToolUseInfo>)> = Vec::new(); - let mut group_index: HashMap = HashMap::new(); - - for tool_use in &tool_use_infos { - if let Some(&idx) = group_index.get(&tool_use.name) { - tool_groups[idx].1.push(tool_use); - } else { - let idx = tool_groups.len(); - group_index.insert(tool_use.name.clone(), idx); - tool_groups.push((tool_use.name.clone(), vec![tool_use])); - } - } - - for (tool_name, uses) in &tool_groups { - let tool_step_id = format!("{}-tool-{}", entry.uuid, tool_name); - let tool_actor = format!("agent:claude-code/tool:{}", tool_name); - - // Register the tool actor - actors - .entry(tool_actor.clone()) - .or_insert_with(|| ActorDefinition { - name: Some(format!("Claude Code / {}", tool_name)), - ..Default::default() - }); - - let mut tool_changes = HashMap::new(); - let category = tool_category_str(tool_name); - - for tool_use in uses { - // Determine artifact key - let artifact_key = if is_file_tool(tool_name) { - tool_use - .input - .get("file_path") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) - .unwrap_or_else(|| { - format!( - "agent://claude/{}/tool/{}/{}", - conversation.session_id, category, tool_use.id - ) - }) - } else { - format!( - "agent://claude/{}/tool/{}/{}", - conversation.session_id, category, tool_use.id - ) - }; - - let mut extra = HashMap::new(); - extra.insert("tool_use_id".to_string(), json!(tool_use.id)); - extra.insert("name".to_string(), json!(tool_use.name)); - extra.insert("input".to_string(), tool_use.input.clone()); - extra.insert("category".to_string(), json!(category)); - - // Look up assembled tool result from ConversationView - if let Some(turn) = turn_by_id.get(entry.uuid.as_str()) - && let Some(invocation) = - turn.tool_uses.iter().find(|tu| tu.id == tool_use.id) - && let Some(result) = &invocation.result - { - extra.insert("result".to_string(), json!(result.content)); - extra.insert("is_error".to_string(), json!(result.is_error)); - } - - // For file-write tools (Edit / Write / MultiEdit / - // NotebookEdit), compute a unified diff so the artifact - // carries the actual change, not just the raw tool input. - // - // For `Write { content }` specifically the JSONL log - // doesn't capture the prior file state, so we consult - // git HEAD as a best-effort pre-image. If the project - // isn't a git repo or the file isn't tracked, we fall - // back to diffing against "" (addition-only hunk). - let raw = if category == "file_write" { - let before_state = if tool_name == "Write" { - resolve_local_dir( - config.project_path.as_deref(), - conversation.project_path.as_deref(), - entry.cwd.as_deref(), - ) - .and_then(|dir| git_head_content(&dir, &artifact_key)) - } else { - None - }; - file_write_diff( - tool_name, - &tool_use.input, - &artifact_key, - before_state.as_deref(), - ) - } else { - None - }; - - tool_changes.insert( - artifact_key, - ArtifactChange { - raw, - structural: Some(StructuralChange { - change_type: "tool.invoke".to_string(), - extra, - }), - }, - ); - } - - let tool_step = Step { - step: StepIdentity { - id: tool_step_id, - parents: vec![step_id.clone()], - actor: tool_actor, - timestamp: entry.timestamp.clone(), - }, - change: tool_changes, - meta: None, - }; - - // Tool steps do NOT advance last_step_id - steps.push(tool_step); - } - } - } - - let head = last_step_id.unwrap_or_else(|| "empty".to_string()); - let base_uri = config - .project_path - .as_deref() - .or(conversation.project_path.as_deref()) - .map(|p| format!("file://{}", p)); - - Path { - path: PathIdentity { - id: format!("path-claude-{}", session_short), - base: base_uri.map(|uri| Base { - uri, - ref_str: None, - branch: None, - }), - head, - graph_ref: None, - }, - steps, - meta: Some(PathMeta { - title: Some(format!("Claude session: {}", session_short)), - source: Some("claude-code".to_string()), - actors: if actors.is_empty() { - None - } else { - Some(actors) - }, - ..Default::default() - }), - } + toolpath_convo::derive_path(&view, &cfg) } /// Derive Toolpath Paths from multiple conversations in a project. -pub fn derive_project(conversations: &[Conversation], config: &DeriveConfig) -> Vec { +pub fn derive_project( + conversations: &[Conversation], + config: &DeriveConfig, +) -> Vec { conversations .iter() .map(|c| derive_path(c, config)) .collect() } -/// Return the first `n` characters of a string, safe for any UTF-8 content. -fn safe_prefix(s: &str, n: usize) -> String { - s.chars().take(n).collect() -} - #[cfg(test)] mod tests { use super::*; - use crate::types::{ContentPart, ConversationEntry, Message, MessageContent, Usage}; + use crate::types::{Conversation, ConversationEntry, Message, MessageContent, MessageRole}; + use std::collections::HashMap; + use toolpath::v1::Graph; - fn make_entry( - uuid: &str, - role: MessageRole, - content: &str, - timestamp: &str, - ) -> ConversationEntry { + fn user_entry(uuid: &str, parent: Option<&str>, text: &str, cwd: &str) -> ConversationEntry { ConversationEntry { - parent_uuid: None, + entry_type: "user".into(), + uuid: uuid.into(), + parent_uuid: parent.map(str::to_string), + session_id: Some("sess-1".into()), + timestamp: "2026-01-01T00:00:00Z".into(), + cwd: Some(cwd.into()), + git_branch: Some("main".into()), + version: Some("1.0.0".into()), + user_type: None, + request_id: None, + message_id: None, + snapshot: None, + tool_use_result: None, is_sidechain: false, - entry_type: match role { - MessageRole::User => "user", - MessageRole::Assistant => "assistant", - MessageRole::System => "system", - } - .to_string(), - uuid: uuid.to_string(), - timestamp: timestamp.to_string(), - session_id: Some("test-session".to_string()), - cwd: None, - git_branch: None, - version: None, message: Some(Message { - role, - content: Some(MessageContent::Text(content.to_string())), + role: MessageRole::User, + content: Some(MessageContent::Text(text.into())), model: None, id: None, message_type: None, @@ -851,1893 +85,93 @@ mod tests { stop_sequence: None, usage: None, }), - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - } - } - - fn make_conversation(entries: Vec) -> Conversation { - let mut convo = Conversation::new("test-session-12345678".to_string()); - for entry in entries { - convo.add_entry(entry); + extra: HashMap::new(), } - convo - } - - // ── safe_prefix ──────────────────────────────────────────────────── - - #[test] - fn test_safe_prefix_normal() { - assert_eq!(safe_prefix("abcdef1234", 8), "abcdef12"); - } - - #[test] - fn test_safe_prefix_short() { - assert_eq!(safe_prefix("abc", 8), "abc"); - } - - #[test] - fn test_safe_prefix_unicode() { - assert_eq!( - safe_prefix("\u{65E5}\u{672C}\u{8A9E}\u{30C6}\u{30B9}\u{30C8}", 3), - "\u{65E5}\u{672C}\u{8A9E}" - ); - } - - // ── tool helpers ────────────────────────────────────────────────── - - #[test] - fn test_tool_category_str() { - assert_eq!(tool_category_str("Read"), "file_read"); - assert_eq!(tool_category_str("Write"), "file_write"); - assert_eq!(tool_category_str("Edit"), "file_write"); - assert_eq!(tool_category_str("Glob"), "file_search"); - assert_eq!(tool_category_str("Grep"), "file_search"); - assert_eq!(tool_category_str("Bash"), "shell"); - assert_eq!(tool_category_str("WebFetch"), "network"); - assert_eq!(tool_category_str("Task"), "delegation"); - assert_eq!(tool_category_str("SomethingElse"), "unknown"); - } - - #[test] - fn test_is_file_tool() { - assert!(is_file_tool("Read")); - assert!(is_file_tool("Write")); - assert!(is_file_tool("Edit")); - assert!(is_file_tool("Glob")); - assert!(is_file_tool("Grep")); - assert!(is_file_tool("NotebookEdit")); - assert!(!is_file_tool("Bash")); - assert!(!is_file_tool("WebFetch")); - assert!(!is_file_tool("Task")); - } - - // ── derive_path ──────────────────────────────────────────────────── - - #[test] - fn test_derive_path_basic() { - let entries = vec![ - make_entry( - "uuid-1111-aaaa", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ), - make_entry( - "uuid-2222-bbbb", - MessageRole::Assistant, - "Hi there", - "2024-01-01T00:00:01Z", - ), - ]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - assert!(path.path.id.starts_with("path-claude-")); - assert_eq!(path.steps.len(), 2); - // Step IDs are full UUIDs - assert_eq!(path.steps[0].step.id, "uuid-1111-aaaa"); - assert_eq!(path.steps[1].step.id, "uuid-2222-bbbb"); - assert_eq!(path.steps[0].step.actor, "human:user"); - assert!(path.steps[1].step.actor.starts_with("agent:")); } - #[test] - fn test_derive_path_step_parents() { - let entries = vec![ - make_entry( - "uuid-1111", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ), - make_entry( - "uuid-2222", - MessageRole::Assistant, - "Hi", - "2024-01-01T00:00:01Z", - ), - make_entry( - "uuid-3333", - MessageRole::User, - "More", - "2024-01-01T00:00:02Z", - ), - ]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - // Parents are full UUIDs - assert!( - path.steps[1] - .step - .parents - .contains(&"uuid-1111".to_string()) - ); - assert!( - path.steps[2] - .step - .parents - .contains(&"uuid-2222".to_string()) - ); - } - - #[test] - fn test_derive_path_conversation_artifact() { - let entries = vec![make_entry( - "uuid-1111", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - // Artifact key uses agent:// scheme - let convo_key = format!("agent://claude/{}", convo.session_id); - assert!(path.steps[0].change.contains_key(&convo_key)); - - let change = &path.steps[0].change[&convo_key]; - let structural = change.structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "conversation.append"); - assert_eq!(structural.extra["role"], "user"); - } - - #[test] - fn test_derive_path_no_meta_intent() { - let entries = vec![make_entry( - "uuid-1111", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - // meta.intent should NOT be set (we removed it as redundant) - assert!(path.steps[0].meta.is_none()); - } - - #[test] - fn test_derive_path_actors() { - let entries = vec![ - make_entry( - "uuid-1111", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ), - make_entry( - "uuid-2222", - MessageRole::Assistant, - "Hi", - "2024-01-01T00:00:01Z", - ), - ]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - let actors = path.meta.as_ref().unwrap().actors.as_ref().unwrap(); - - assert!(actors.contains_key("human:user")); - // Assistant actor depends on model (None in our test) - assert!(actors.contains_key("agent:claude-code")); - } - - #[test] - fn test_derive_path_with_project_path_config() { - let convo = make_conversation(vec![make_entry( - "uuid-1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )]); - let config = DeriveConfig { - project_path: Some("/my/project".to_string()), - ..Default::default() - }; - - let path = derive_path(&convo, &config); - assert_eq!(path.path.base.as_ref().unwrap().uri, "file:///my/project"); - } - - #[test] - fn test_derive_path_skips_empty_content() { - let mut entry = make_entry("uuid-1111", MessageRole::User, "", "2024-01-01T00:00:00Z"); - // Empty text, no tool uses, no file changes -> should be skipped - entry.message.as_mut().unwrap().content = Some(MessageContent::Text(" ".to_string())); - - let convo = make_conversation(vec![entry]); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - assert!(path.steps.is_empty()); - } - - #[test] - fn test_derive_path_captures_system_messages_as_events() { - let entries = vec![ - make_entry( - "uuid-1111", - MessageRole::System, - "System prompt", - "2024-01-01T00:00:00Z", - ), - make_entry( - "uuid-2222", - MessageRole::User, - "Hello", - "2024-01-01T00:00:01Z", - ), - ]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - // System message captured as event, plus user message - assert_eq!(path.steps.len(), 2); - // First step is the system event - assert_eq!(path.steps[0].step.actor, "tool:claude-code"); - let convo_key = format!("agent://claude/{}", convo.session_id); - let structural = path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap(); - assert_eq!(structural.change_type, "conversation.event"); - assert_eq!(structural.extra["entry_type"], "system"); - assert_eq!(structural.extra["text"], "System prompt"); - // Second step is the user message - assert_eq!(path.steps[1].step.actor, "human:user"); - } - - #[test] - fn test_derive_path_with_tool_use() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let entry = ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-tool".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Let me write that".to_string(), - }, - ContentPart::ToolUse { - id: "t1".to_string(), - name: "Write".to_string(), - input: serde_json::json!({"file_path": "/tmp/test.rs"}), - }, - ])), - model: Some("claude-sonnet-4-5-20250929".to_string()), - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, + fn assistant_entry(uuid: &str, parent: Option<&str>, text: &str) -> ConversationEntry { + ConversationEntry { + entry_type: "assistant".into(), + uuid: uuid.into(), + parent_uuid: parent.map(str::to_string), + session_id: Some("sess-1".into()), + timestamp: "2026-01-01T00:00:01Z".into(), + cwd: Some("/tmp/proj".into()), + git_branch: Some("main".into()), + version: Some("1.0.0".into()), user_type: None, request_id: None, - tool_use_result: None, - snapshot: None, message_id: None, - extra: Default::default(), - }; - convo.add_entry(entry); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - // Now produces 2 steps: conversation + tool - assert_eq!(path.steps.len(), 2); - - // Conversation step has the conversation artifact - let convo_key = format!("agent://claude/{}", convo.session_id); - assert!(path.steps[0].change.contains_key(&convo_key)); - - // Tool step has the file artifact with tool.invoke - assert_eq!(path.steps[1].step.id, "uuid-tool-tool-Write"); - assert_eq!(path.steps[1].step.actor, "agent:claude-code/tool:Write"); - assert!( - path.steps[1] - .step - .parents - .contains(&"uuid-tool".to_string()) - ); - assert!(path.steps[1].change.contains_key("/tmp/test.rs")); - - let tool_change = &path.steps[1].change["/tmp/test.rs"]; - let structural = tool_change.structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "tool.invoke"); - assert_eq!(structural.extra["name"], "Write"); - assert_eq!(structural.extra["tool_use_id"], "t1"); - assert_eq!(structural.extra["category"], "file_write"); - } - - #[test] - fn test_derive_path_sidechain_uses_parent_uuid() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - - let e1 = make_entry( - "uuid-main-11", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ); - let e2 = make_entry( - "uuid-main-22", - MessageRole::Assistant, - "Hi", - "2024-01-01T00:00:01Z", - ); - let mut e3 = make_entry( - "uuid-side-33", - MessageRole::User, - "Side", - "2024-01-01T00:00:02Z", - ); - e3.is_sidechain = true; - e3.parent_uuid = Some("uuid-main-11".to_string()); - - convo.add_entry(e1); - convo.add_entry(e2); - convo.add_entry(e3); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - assert_eq!(path.steps.len(), 3); - // Sidechain step should reference e1's full UUID as parent - let sidechain_step = &path.steps[2]; - assert!( - sidechain_step - .step - .parents - .contains(&"uuid-main-11".to_string()) - ); - } - - // ── derive_project ───────────────────────────────────────────────── - - #[test] - fn test_derive_project() { - let c1 = make_conversation(vec![make_entry( - "uuid-1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )]); - let mut c2 = Conversation::new("session-2".to_string()); - c2.add_entry(make_entry( - "uuid-2", - MessageRole::User, - "World", - "2024-01-02T00:00:00Z", - )); - - let config = DeriveConfig::default(); - let paths = derive_project(&[c1, c2], &config); - - assert_eq!(paths.len(), 2); - } - - #[test] - fn test_derive_path_head_is_last_non_sidechain() { - let entries = vec![ - make_entry( - "uuid-1111", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ), - make_entry( - "uuid-2222", - MessageRole::Assistant, - "Hi", - "2024-01-01T00:00:01Z", - ), - ]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - // Head should point to the last conversation step (full UUID) - assert_eq!(path.path.head, "uuid-2222"); - } - - // ── new tests for enriched derive ────────────────────────────────── - - #[test] - fn test_derive_path_tool_invocation_actors() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, + snapshot: None, + tool_use_result: None, is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-1".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), message: Some(Message { role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Working".to_string(), - }, - ContentPart::ToolUse { - id: "t1".to_string(), - name: "Read".to_string(), - input: serde_json::json!({"file_path": "/foo.rs"}), - }, - ])), - model: None, + content: Some(MessageContent::Text(text.into())), + model: Some("claude-opus-4-7".into()), id: None, message_type: None, - stop_reason: None, + stop_reason: Some("end_turn".into()), stop_sequence: None, usage: None, }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); + extra: HashMap::new(), + } + } - let actors = path.meta.as_ref().unwrap().actors.as_ref().unwrap(); - assert!(actors.contains_key("agent:claude-code/tool:Read")); + fn make_convo() -> Conversation { + Conversation { + session_id: "sess-1abc".into(), + project_path: Some("/tmp/proj".into()), + entries: vec![ + user_entry("u1", None, "Fix bug", "/tmp/proj"), + assistant_entry("a1", Some("u1"), "Done"), + ], + preamble: vec![], + started_at: None, + last_activity: None, + session_ids: vec![], + } } #[test] - fn test_derive_path_token_usage() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-usage".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Text("Response".to_string())), - model: Some("claude-sonnet-4-5-20250929".to_string()), - id: None, - message_type: None, - stop_reason: Some("end_turn".to_string()), - stop_sequence: None, - usage: Some(Usage { - input_tokens: Some(100), - output_tokens: Some(50), - cache_creation_input_tokens: Some(10), - cache_read_input_tokens: Some(80), - cache_creation: None, - service_tier: None, - }), - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let change = &path.steps[0].change[&convo_key]; - let extra = &change.structural.as_ref().unwrap().extra; - - assert_eq!(extra["model"], "claude-sonnet-4-5-20250929"); - assert_eq!(extra["stop_reason"], "end_turn"); - assert_eq!(extra["input_tokens"], 100); - assert_eq!(extra["output_tokens"], 50); - assert_eq!(extra["cache_read_tokens"], 80); - assert_eq!(extra["cache_write_tokens"], 10); + fn derive_path_basic_shape() { + let convo = make_convo(); + let path = derive_path(&convo, &DeriveConfig::default()); + assert!(path.path.id.starts_with("path-claude-code-")); + // Base populated from first entry's cwd / git_branch. + let base = path.path.base.as_ref().expect("base"); + assert_eq!(base.uri, "file:///tmp/proj"); + assert_eq!(base.branch.as_deref(), Some("main")); } #[test] - fn test_derive_path_full_text_no_truncation() { - let long_text = "a".repeat(5000); - let entries = vec![make_entry( - "uuid-long", - MessageRole::User, - &long_text, - "2024-01-01T00:00:00Z", - )]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let change = &path.steps[0].change[&convo_key]; - let text = change.structural.as_ref().unwrap().extra["text"] - .as_str() - .unwrap(); - assert_eq!(text.len(), 5000); - assert!(!text.ends_with("...")); + fn derive_path_producer_in_meta_extra() { + let convo = make_convo(); + let path = derive_path(&convo, &DeriveConfig::default()); + let producer = path.meta.as_ref().unwrap().extra.get("producer").unwrap(); + assert_eq!(producer["name"], "claude-code"); + assert_eq!(producer["version"], "1.0.0"); } #[test] - fn test_derive_path_multiple_tool_uses_same_type() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-multi".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Reading files".to_string(), - }, - ContentPart::ToolUse { - id: "t1".to_string(), - name: "Read".to_string(), - input: serde_json::json!({"file_path": "/foo.rs"}), - }, - ContentPart::ToolUse { - id: "t2".to_string(), - name: "Read".to_string(), - input: serde_json::json!({"file_path": "/bar.rs"}), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // 1 conversation step + 1 tool step (both Reads grouped) - assert_eq!(path.steps.len(), 2); - assert_eq!(path.steps[1].step.id, "uuid-multi-tool-Read"); - // Two artifact changes in the tool step - assert_eq!(path.steps[1].change.len(), 2); - assert!(path.steps[1].change.contains_key("/foo.rs")); - assert!(path.steps[1].change.contains_key("/bar.rs")); + fn derive_path_actors_populated() { + let convo = make_convo(); + let path = derive_path(&convo, &DeriveConfig::default()); + let actors = path.meta.as_ref().unwrap().actors.as_ref().unwrap(); + assert!(actors.contains_key("human:user")); + assert!(actors.contains_key("agent:claude-opus-4-7")); } #[test] - fn test_derive_path_multiple_tool_uses_different_types() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-diff".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Working".to_string(), - }, - ContentPart::ToolUse { - id: "t1".to_string(), - name: "Read".to_string(), - input: serde_json::json!({"file_path": "/foo.rs"}), - }, - ContentPart::ToolUse { - id: "t2".to_string(), - name: "Bash".to_string(), - input: serde_json::json!({"command": "cargo test"}), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // 1 conversation step + 2 tool steps (Read and Bash) - assert_eq!(path.steps.len(), 3); - assert_eq!(path.steps[1].step.id, "uuid-diff-tool-Read"); - assert_eq!(path.steps[2].step.id, "uuid-diff-tool-Bash"); - - // Bash tool uses agent:// URI since it's not a file tool - let bash_change = &path.steps[2].change; - assert_eq!(bash_change.len(), 1); - let bash_key = bash_change.keys().next().unwrap(); - assert!(bash_key.starts_with("agent://claude/")); - assert!(bash_key.contains("/tool/shell/")); - } - - #[test] - fn test_derive_path_non_file_tool_artifact_key() { - let mut convo = Conversation::new("sess-123".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-bash".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Running".to_string(), - }, - ContentPart::ToolUse { - id: "tu-42".to_string(), - name: "Bash".to_string(), - input: serde_json::json!({"command": "ls"}), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let tool_step = &path.steps[1]; - let expected_key = "agent://claude/sess-123/tool/shell/tu-42"; - assert!(tool_step.change.contains_key(expected_key)); - } - - #[test] - fn test_derive_path_thinking_included_when_configured() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-think".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Thinking { - thinking: "Let me think about this".to_string(), - signature: None, - }, - ContentPart::Text { - text: "Here is my answer".to_string(), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - // With thinking enabled - let config = DeriveConfig { - include_thinking: true, - ..Default::default() - }; - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - assert_eq!(extra["thinking"], "Let me think about this"); - // Text should be separate from thinking - assert_eq!(extra["text"], "Here is my answer"); - } - - #[test] - fn test_derive_path_thinking_excluded_by_default() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-think2".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Thinking { - thinking: "Secret thoughts".to_string(), - signature: None, - }, - ContentPart::Text { - text: "Answer".to_string(), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - assert!(!extra.contains_key("thinking")); - } - - #[test] - fn test_derive_path_tool_step_does_not_advance_parent_chain() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-a1".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Writing".to_string(), - }, - ContentPart::ToolUse { - id: "t1".to_string(), - name: "Write".to_string(), - input: serde_json::json!({"file_path": "/f.rs"}), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - convo.add_entry(make_entry( - "uuid-u2", - MessageRole::User, - "Next", - "2024-01-01T00:00:01Z", - )); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // Steps: conversation(uuid-a1), tool(uuid-a1-tool-Write), conversation(uuid-u2) - assert_eq!(path.steps.len(), 3); - // The user step's parent should be the conversation step, not the tool step - assert_eq!(path.steps[2].step.parents, vec!["uuid-a1".to_string()]); - } - - #[test] - fn test_derive_path_tool_input_preserved() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let input_json = serde_json::json!({ - "file_path": "/src/main.rs", - "content": "fn main() {}\n" - }); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-inp".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Writing".to_string(), - }, - ContentPart::ToolUse { - id: "t1".to_string(), - name: "Write".to_string(), - input: input_json.clone(), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let tool_step = &path.steps[1]; - let change = &tool_step.change["/src/main.rs"]; - let extra = &change.structural.as_ref().unwrap().extra; - assert_eq!(extra["input"], input_json); - } - - #[test] - fn test_derive_path_edit_tool_emits_unified_diff() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let input_json = serde_json::json!({ - "file_path": "/src/login.rs", - "old_string": "validate_token()", - "new_string": "validate_token_v2()", - }); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-edit".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ContentPart::ToolUse { - id: "t-edit".to_string(), - name: "Edit".to_string(), - input: input_json, - }])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let path = derive_path(&convo, &DeriveConfig::default()); - // steps[0] = assistant turn, steps[1] = tool step (siblings). - let tool_step = &path.steps[1]; - let ch = &tool_step.change["/src/login.rs"]; - let raw = ch - .raw - .as_deref() - .expect("edit tool should emit unified diff"); - // Leading `/` is stripped from the header so `a/`/`b/` don't double up - // (git-style prefixes already denote the repo root). See #36. - assert!(raw.contains("--- a/src/login.rs"), "{}", raw); - assert!(raw.contains("+++ b/src/login.rs"), "{}", raw); - assert!( - !raw.contains("a//"), - "header should not double-slash: {}", - raw - ); - assert!(raw.contains("-validate_token()"), "{}", raw); - assert!(raw.contains("+validate_token_v2()"), "{}", raw); - - // Sanity-check the parent wiring that the chat view relies on: - // the tool step's parent is the assistant step, and they share - // the same `entry.uuid` root so the frontend splice works. - assert_eq!(tool_step.step.parents, vec![path.steps[0].step.id.clone()]); - } - - // ── tool result assembly ────────────────────────────────────────── - - #[test] - fn test_derive_path_tool_result_assembled() { - use crate::types::ToolResultContent; - - let mut convo = Conversation::new("test-session-12345678".to_string()); - - // Assistant entry with a tool use - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-assist-1".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Let me read that file".to_string(), - }, - ContentPart::ToolUse { - id: "tu-read-1".to_string(), - name: "Read".to_string(), - input: serde_json::json!({"file_path": "/src/lib.rs"}), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - // Tool-result-only user entry - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "user".to_string(), - uuid: "uuid-result-1".to_string(), - timestamp: "2024-01-01T00:00:01Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::User, - content: Some(MessageContent::Parts(vec![ContentPart::ToolResult { - tool_use_id: "tu-read-1".to_string(), - content: ToolResultContent::Text("fn main() {}".to_string()), - is_error: false, - }])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // 3 steps: assistant conversation.append, tool.invoke, tool_result_user event. - // The tool-result-only entry is now preserved as a `conversation.event` - // step so its UUID survives the round-trip. - assert_eq!(path.steps.len(), 3); - - // The tool step is the second step. - let tool_step = &path.steps[1]; - assert_eq!(tool_step.step.id, "uuid-assist-1-tool-Read"); - let change = &tool_step.change["/src/lib.rs"]; - let extra = &change.structural.as_ref().unwrap().extra; - assert_eq!(extra["result"], "fn main() {}"); - assert_eq!(extra["is_error"], false); - } - - #[test] - fn test_derive_path_tool_result_error() { - use crate::types::ToolResultContent; - - let mut convo = Conversation::new("test-session-12345678".to_string()); - - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-assist-err".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Parts(vec![ - ContentPart::Text { - text: "Running command".to_string(), - }, - ContentPart::ToolUse { - id: "tu-bash-1".to_string(), - name: "Bash".to_string(), - input: serde_json::json!({"command": "cargo test"}), - }, - ])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - // Tool result with error - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "user".to_string(), - uuid: "uuid-result-err".to_string(), - timestamp: "2024-01-01T00:00:01Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::User, - content: Some(MessageContent::Parts(vec![ContentPart::ToolResult { - tool_use_id: "tu-bash-1".to_string(), - content: ToolResultContent::Text("compilation failed".to_string()), - is_error: true, - }])), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let tool_step = &path.steps[1]; - let bash_key = tool_step.change.keys().next().unwrap(); - let extra = &tool_step.change[bash_key] - .structural - .as_ref() - .unwrap() - .extra; - assert_eq!(extra["result"], "compilation failed"); - assert_eq!(extra["is_error"], true); - } - - // ── conversation.init step ──────────────────────────────────────── - - #[test] - fn test_derive_path_init_step_with_cwd() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry = make_entry("uuid-1", MessageRole::User, "Hello", "2024-01-01T00:00:00Z"); - entry.cwd = Some("/home/user/project".to_string()); - entry.version = Some("1.2.3".to_string()); - convo.add_entry(entry); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // Should have init step + conversation step - assert_eq!(path.steps.len(), 2); - - let init = &path.steps[0]; - assert_eq!(init.step.id, "test-session-12345678-init"); - assert_eq!(init.step.actor, "tool:claude-code"); - assert!(init.step.parents.is_empty()); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let structural = init.change[&convo_key].structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "conversation.init"); - assert_eq!(structural.extra["working_dir"], "/home/user/project"); - assert_eq!(structural.extra["version"], "1.2.3"); - } - - #[test] - fn test_derive_path_init_step_is_parent_of_first() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry = make_entry("uuid-1", MessageRole::User, "Hello", "2024-01-01T00:00:00Z"); - entry.cwd = Some("/project".to_string()); - convo.add_entry(entry); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // The first conversation step should have init as parent - assert_eq!(path.steps.len(), 2); - assert_eq!( - path.steps[1].step.parents, - vec!["test-session-12345678-init".to_string()] - ); - } - - #[test] - fn test_derive_path_init_step_with_git_branch() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry = make_entry("uuid-1", MessageRole::User, "Hello", "2024-01-01T00:00:00Z"); - entry.git_branch = Some("feature/foo".to_string()); - convo.add_entry(entry); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - assert_eq!(path.steps.len(), 2); - let init = &path.steps[0]; - let convo_key = format!("agent://claude/{}", convo.session_id); - let structural = init.change[&convo_key].structural.as_ref().unwrap(); - assert_eq!(structural.extra["vcs_branch"], "feature/foo"); - } - - #[test] - fn test_derive_path_no_init_step_without_metadata() { - // Standard make_entry has no cwd/version/git_branch - let entries = vec![make_entry( - "uuid-1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - // No init step should be generated - assert_eq!(path.steps.len(), 1); - assert_eq!(path.steps[0].step.id, "uuid-1"); - } - - // ── per-entry metadata capture ────────────────────────────────── - - #[test] - fn test_derive_path_captures_cwd_and_git_branch() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry = make_entry( - "uuid-meta-1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ); - entry.cwd = Some("/home/user/project".to_string()); - entry.git_branch = Some("main".to_string()); - convo.add_entry(entry); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // Find the conversation.append step (skip init step) - let convo_key = format!("agent://claude/{}", convo.session_id); - let append_step = path - .steps - .iter() - .find(|s| { - s.change - .get(&convo_key) - .and_then(|c| c.structural.as_ref()) - .is_some_and(|sc| sc.change_type == "conversation.append") - }) - .expect("should have a conversation.append step"); - let extra = &append_step.change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - assert_eq!(extra["cwd"], "/home/user/project"); - assert_eq!(extra["git_branch"], "main"); - } - - #[test] - fn test_derive_path_captures_version() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry = make_entry( - "uuid-meta-2", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - ); - entry.version = Some("1.5.0".to_string()); - convo.add_entry(entry); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let append_step = path - .steps - .iter() - .find(|s| { - s.change - .get(&convo_key) - .and_then(|c| c.structural.as_ref()) - .is_some_and(|sc| sc.change_type == "conversation.append") - }) - .expect("should have a conversation.append step"); - let extra = &append_step.change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - assert_eq!(extra["version"], "1.5.0"); - } - - #[test] - fn test_derive_path_captures_user_type_and_request_id() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "assistant".to_string(), - uuid: "uuid-meta-3".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::Assistant, - content: Some(MessageContent::Text("Response".to_string())), - model: Some("claude-sonnet-4-5-20250929".to_string()), - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: Some("external".to_string()), - request_id: Some("req-abc-123".to_string()), - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - assert_eq!(extra["user_type"], "external"); - assert_eq!(extra["request_id"], "req-abc-123"); - } - - #[test] - fn test_derive_path_captures_entry_extra() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry_extra = HashMap::new(); - entry_extra.insert("entrypoint".to_string(), serde_json::json!("cli")); - entry_extra.insert("isMeta".to_string(), serde_json::json!(true)); - entry_extra.insert("slug".to_string(), serde_json::json!("my-slug")); - - convo.add_entry(ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: "user".to_string(), - uuid: "uuid-meta-4".to_string(), - timestamp: "2024-01-01T00:00:00Z".to_string(), - session_id: Some("test-session".to_string()), - message: Some(Message { - role: MessageRole::User, - content: Some(MessageContent::Text("Hello".to_string())), - model: None, - id: None, - message_type: None, - stop_reason: None, - stop_sequence: None, - usage: None, - }), - cwd: None, - git_branch: None, - version: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: entry_extra, - }); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - let entry_extra_val = extra - .get("entry_extra") - .expect("entry_extra should be present"); - assert_eq!(entry_extra_val["entrypoint"], "cli"); - assert_eq!(entry_extra_val["isMeta"], true); - assert_eq!(entry_extra_val["slug"], "my-slug"); - } - - #[test] - fn test_derive_path_missing_metadata_not_included() { - // Standard make_entry has no cwd/version/git_branch/user_type/request_id/extra - let entries = vec![make_entry( - "uuid-meta-5", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )]; - let convo = make_conversation(entries); - let config = DeriveConfig::default(); - - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - // None of the per-entry metadata fields should be present - assert!(!extra.contains_key("cwd")); - assert!(!extra.contains_key("version")); - assert!(!extra.contains_key("git_branch")); - assert!(!extra.contains_key("user_type")); - assert!(!extra.contains_key("request_id")); - assert!(!extra.contains_key("entry_extra")); - } - - #[test] - fn test_derive_path_init_step_actor_registered() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut entry = make_entry("uuid-1", MessageRole::User, "Hello", "2024-01-01T00:00:00Z"); - entry.cwd = Some("/project".to_string()); - convo.add_entry(entry); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let actors = path.meta.as_ref().unwrap().actors.as_ref().unwrap(); - assert!(actors.contains_key("tool:claude-code")); - assert_eq!( - actors["tool:claude-code"].name.as_deref(), - Some("Claude Code") - ); - } - - // ── conversation.event steps (non-message entries) ──────────────── - - fn make_event_entry(uuid: &str, entry_type: &str, timestamp: &str) -> ConversationEntry { - ConversationEntry { - parent_uuid: None, - is_sidechain: false, - entry_type: entry_type.to_string(), - uuid: uuid.to_string(), - timestamp: timestamp.to_string(), - session_id: Some("test-session".to_string()), - cwd: None, - git_branch: None, - version: None, - message: None, - user_type: None, - request_id: None, - tool_use_result: None, - snapshot: None, - message_id: None, - extra: Default::default(), - } - } - - #[test] - fn test_derive_path_attachment_entry_captured_as_event() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(make_entry( - "uuid-1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )); - convo.add_entry(make_event_entry( - "uuid-attach-1", - "attachment", - "2024-01-01T00:00:01Z", - )); - convo.add_entry(make_entry( - "uuid-2", - MessageRole::Assistant, - "Hi", - "2024-01-01T00:00:02Z", - )); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // 3 steps: user, attachment event, assistant - assert_eq!(path.steps.len(), 3); - - let event_step = &path.steps[1]; - assert_eq!(event_step.step.id, "uuid-attach-1"); - assert_eq!(event_step.step.actor, "tool:claude-code"); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let structural = event_step.change[&convo_key].structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "conversation.event"); - assert_eq!(structural.extra["entry_type"], "attachment"); - } - - #[test] - fn test_derive_path_system_entry_captured_as_event() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(make_entry( - "uuid-sys", - MessageRole::System, - "Turn duration: 5s", - "2024-01-01T00:00:00Z", - )); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - assert_eq!(path.steps.len(), 1); - let event_step = &path.steps[0]; - assert_eq!(event_step.step.actor, "tool:claude-code"); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let structural = event_step.change[&convo_key].structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "conversation.event"); - assert_eq!(structural.extra["entry_type"], "system"); - assert_eq!(structural.extra["text"], "Turn duration: 5s"); - } - - #[test] - fn test_derive_path_empty_uuid_entry_gets_synthetic_id() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut event = make_event_entry("", "permission-mode", "2024-01-01T00:00:00Z"); - event.uuid = String::new(); - convo.add_entry(event); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - assert_eq!(path.steps.len(), 1); - // Synthetic ID: {session_id}-event-{index} - assert_eq!(path.steps[0].step.id, "test-session-12345678-event-0"); - } - - #[test] - fn test_derive_path_event_steps_dont_advance_parent_chain() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(make_entry( - "uuid-u1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )); - convo.add_entry(make_event_entry( - "uuid-attach", - "attachment", - "2024-01-01T00:00:01Z", - )); - convo.add_entry(make_entry( - "uuid-a1", - MessageRole::Assistant, - "Hi", - "2024-01-01T00:00:02Z", - )); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - assert_eq!(path.steps.len(), 3); - // The assistant step's parent should be the USER step, not the event step - assert_eq!(path.steps[2].step.parents, vec!["uuid-u1".to_string()]); - // The head should be the assistant step, not the event step - assert_eq!(path.path.head, "uuid-a1"); - } - - #[test] - fn test_derive_path_event_step_extras_contain_metadata() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut event = - make_event_entry("uuid-ev1", "file-history-snapshot", "2024-01-01T00:00:00Z"); - event.cwd = Some("/home/user/project".to_string()); - event.version = Some("1.5.0".to_string()); - event.git_branch = Some("main".to_string()); - event.user_type = Some("external".to_string()); - event.snapshot = Some(serde_json::json!({"files": ["/src/main.rs"]})); - event.message_id = Some("msg-123".to_string()); - convo.add_entry(event); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // First step is init (because cwd is present), second is the event - let convo_key = format!("agent://claude/{}", convo.session_id); - // Find the event step (skip init) - let event_step = path - .steps - .iter() - .find(|s| { - s.change - .get(&convo_key) - .and_then(|c| c.structural.as_ref()) - .is_some_and(|sc| sc.change_type == "conversation.event") - }) - .expect("should have a conversation.event step"); - let extra = &event_step.change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - assert_eq!(extra["entry_type"], "file-history-snapshot"); - assert_eq!(extra["cwd"], "/home/user/project"); - assert_eq!(extra["version"], "1.5.0"); - assert_eq!(extra["git_branch"], "main"); - assert_eq!(extra["user_type"], "external"); - assert_eq!( - extra["snapshot"], - serde_json::json!({"files": ["/src/main.rs"]}) - ); - assert_eq!(extra["message_id"], "msg-123"); - } - - #[test] - fn test_derive_path_event_entry_extra_preserved() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut event = make_event_entry("uuid-ev2", "attachment", "2024-01-01T00:00:00Z"); - let mut extras = HashMap::new(); - extras.insert("hookName".to_string(), serde_json::json!("pre-tool-use")); - extras.insert("toolName".to_string(), serde_json::json!("Bash")); - event.extra = extras; - convo.add_entry(event); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - let entry_extra = extra - .get("entry_extra") - .expect("entry_extra should be present"); - assert_eq!(entry_extra["hookName"], "pre-tool-use"); - assert_eq!(entry_extra["toolName"], "Bash"); - } - - #[test] - fn test_derive_path_event_with_parent_uuid() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - convo.add_entry(make_entry( - "uuid-u1", - MessageRole::User, - "Hello", - "2024-01-01T00:00:00Z", - )); - let mut event = make_event_entry("uuid-ev-parent", "attachment", "2024-01-01T00:00:01Z"); - event.parent_uuid = Some("uuid-u1".to_string()); - convo.add_entry(event); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - // Event step should use its own parent_uuid - assert_eq!(path.steps[1].step.parents, vec!["uuid-u1".to_string()]); - } - - #[test] - fn test_resolve_local_dir_prefers_entry_cwd() { - let dir = resolve_local_dir( - Some("/from/config"), - Some("/from/convo"), - Some("/from/entry"), - ) - .unwrap(); - assert_eq!(dir, "/from/entry"); - } - - #[test] - fn test_resolve_local_dir_falls_back_to_config_then_convo() { - let dir = resolve_local_dir(Some("/from/config"), Some("/from/convo"), None).unwrap(); - assert_eq!(dir, "/from/config"); - let dir = resolve_local_dir(None, Some("/from/convo"), None).unwrap(); - assert_eq!(dir, "/from/convo"); - assert!(resolve_local_dir(None, None, None).is_none()); - } - - #[test] - fn test_resolve_local_dir_strips_file_prefix() { - let dir = resolve_local_dir(Some("file:///usr/local/src"), None, None).unwrap(); - assert_eq!(dir, "/usr/local/src"); - } - - /// End-to-end: spin up a real tempdir git repo with a tracked file, - /// run a Claude Write-tool invocation through `derive_path`, and - /// verify the resulting `raw` diff shows `-` lines for the prior - /// committed content (not just `+` additions). - #[test] - fn test_write_tool_before_state_comes_from_git_head() { - use std::process::Command; - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - // Initialise a tiny git repo with a file checked in at HEAD. - let run = |args: &[&str]| { - let out = Command::new("git") - .current_dir(root) - .args(args) - .output() - .expect("git on PATH"); - assert!( - out.status.success(), - "git {:?} failed: {}", - args, - String::from_utf8_lossy(&out.stderr) - ); - }; - run(&["init", "-q", "-b", "main"]); - run(&["config", "user.email", "test@example.com"]); - run(&["config", "user.name", "Test"]); - run(&["config", "commit.gpgsign", "false"]); - std::fs::write(root.join("hello.txt"), "old-content\n").unwrap(); - run(&["add", "hello.txt"]); - run(&["commit", "-q", "-m", "init"]); - - // Build a minimal Conversation with one assistant entry that - // carries a Write tool use against `hello.txt`. - let mut convo = Conversation::new("test-session-42".to_string()); - let mut entry = make_entry( - "uuid-w", - MessageRole::Assistant, - "writing", - "2024-01-01T00:00:00Z", - ); - entry.cwd = Some(root.to_string_lossy().into_owned()); - // Override message content with a Write tool_use content part. - if let Some(msg) = &mut entry.message { - msg.content = Some(MessageContent::Parts(vec![ContentPart::ToolUse { - id: "tu-1".into(), - name: "Write".into(), - input: json!({ - "file_path": root.join("hello.txt").to_string_lossy(), - "content": "new-content\n", - }), - }])); - } - convo.add_entry(entry); - - let path = derive_path(&convo, &DeriveConfig::default()); - - // Find the tool step and its Write artifact change. - let artifact_key = root.join("hello.txt").to_string_lossy().into_owned(); - let change = path - .steps - .iter() - .find_map(|s| s.change.get(&artifact_key)) - .expect("tool step with hello.txt artifact"); - let raw = change.raw.as_deref().expect("Write should emit raw diff"); - assert!( - raw.contains("-old-content"), - "expected removal line, got:\n{raw}" - ); - assert!( - raw.contains("+new-content"), - "expected addition line, got:\n{raw}" - ); - } - - /// Symmetric fallback: no git repo → before-state resolver returns - /// None → `file_write_diff` produces an addition-only diff (existing - /// behaviour preserved for new files / non-git projects). - #[test] - fn test_write_tool_falls_back_to_addition_only_without_git() { - let tmp = tempfile::tempdir().unwrap(); - let root = tmp.path(); - - let mut convo = Conversation::new("test-session-43".to_string()); - let mut entry = make_entry( - "uuid-w", - MessageRole::Assistant, - "writing", - "2024-01-01T00:00:00Z", - ); - entry.cwd = Some(root.to_string_lossy().into_owned()); - if let Some(msg) = &mut entry.message { - msg.content = Some(MessageContent::Parts(vec![ContentPart::ToolUse { - id: "tu-1".into(), - name: "Write".into(), - input: json!({ - "file_path": root.join("new.txt").to_string_lossy(), - "content": "fresh\n", - }), - }])); - } - convo.add_entry(entry); - + fn derive_path_validates_as_single_path_graph() { + let convo = make_convo(); let path = derive_path(&convo, &DeriveConfig::default()); - let artifact_key = root.join("new.txt").to_string_lossy().into_owned(); - let raw = path - .steps - .iter() - .find_map(|s| s.change.get(&artifact_key)) - .and_then(|c| c.raw.as_deref()) - .expect("Write should emit raw diff"); - assert!(raw.contains("+fresh")); - // No `-` lines (other than the `---` header). - assert!( - !raw.lines() - .any(|l| l.starts_with('-') && !l.starts_with("---")), - "unexpected removal line in:\n{raw}" - ); - } - - #[test] - fn test_derive_path_event_with_tool_use_result() { - let mut convo = Conversation::new("test-session-12345678".to_string()); - let mut event = make_event_entry("uuid-ev-tur", "attachment", "2024-01-01T00:00:00Z"); - event.tool_use_result = Some(serde_json::json!({ - "tool_use_id": "tu-123", - "content": "hook output" - })); - convo.add_entry(event); - - let config = DeriveConfig::default(); - let path = derive_path(&convo, &config); - - let convo_key = format!("agent://claude/{}", convo.session_id); - let extra = &path.steps[0].change[&convo_key] - .structural - .as_ref() - .unwrap() - .extra; - - assert_eq!(extra["tool_use_result"]["tool_use_id"], "tu-123"); - assert_eq!(extra["tool_use_result"]["content"], "hook output"); + let doc = Graph::from_path(path); + let json = doc.to_json().unwrap(); + let parsed = Graph::from_json(&json).unwrap(); + let pp = parsed.single_path().expect("single-path graph"); + let anc = toolpath::v1::query::ancestors(&pp.steps, &pp.path.head); + assert_eq!(anc.len(), pp.steps.len(), "all steps on head ancestry"); } } diff --git a/crates/toolpath-claude/src/provider.rs b/crates/toolpath-claude/src/provider.rs index 1daecda..d5c470f 100644 --- a/crates/toolpath-claude/src/provider.rs +++ b/crates/toolpath-claude/src/provider.rs @@ -99,6 +99,8 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { }) .collect(); + let file_mutations = compute_file_mutations(&tool_uses, entry.cwd.as_deref()); + let token_usage = msg.usage.as_ref().map(|u| TokenUsage { input_tokens: u.input_tokens, output_tokens: u.output_tokens, @@ -167,10 +169,101 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { environment, delegations, extra, - file_mutations: Vec::new(), + file_mutations, } } +/// For each file-write tool invocation in the turn, synthesize a unified +/// diff via [`toolpath_convo::file_write_diff`] and pre-resolve the +/// before-state for `Write` via `git show HEAD:` (best-effort). +/// Each mutation links back to its tool via `tool_id`. +fn compute_file_mutations( + tool_uses: &[ToolInvocation], + cwd: Option<&str>, +) -> Vec { + let mut out = Vec::new(); + for tu in tool_uses { + if tu.category != Some(ToolCategory::FileWrite) { + continue; + } + let Some(path) = extract_file_path_for_tool(&tu.input) else { + continue; + }; + // Only `Write` carries whole-file content; consult git HEAD for + // its pre-image so the diff isn't addition-only. Other tools + // (Edit / MultiEdit / NotebookEdit) carry old_string/new_string + // pairs and don't need a before-state lookup. + let before_state = if tu.name == "Write" { + cwd.and_then(|c| git_head_content(c, &path)) + } else { + None + }; + let raw_diff = + toolpath_convo::file_write_diff(&tu.name, &tu.input, &path, before_state.as_deref()); + let operation = match tu.name.as_str() { + "Write" => Some("add".to_string()), + "Edit" | "MultiEdit" | "NotebookEdit" => Some("update".to_string()), + _ => None, + }; + let after = match tu.name.as_str() { + "Write" => tu + .input + .get("content") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + _ => None, + }; + out.push(toolpath_convo::FileMutation { + path, + tool_id: Some(tu.id.clone()), + operation, + raw_diff, + before: before_state, + after, + rename_to: None, + }); + } + out +} + +/// Best-effort lookup of a file's contents at `HEAD` in the git repo +/// rooted at `repo_dir` (or one of its ancestors). Shells out to `git +/// show HEAD:`. Returns `None` when any of these hold: +/// `repo_dir` isn't inside a git repo, `path` isn't tracked at `HEAD`, +/// `git` isn't on `PATH`, or the command otherwise fails. +fn git_head_content(repo_dir: &str, path: &str) -> Option { + use std::path::Path as FsPath; + use std::process::Command; + let repo = FsPath::new(repo_dir); + let file = FsPath::new(path); + let rel = if file.is_absolute() { + file.strip_prefix(repo).ok()?.to_path_buf() + } else { + file.to_path_buf() + }; + let rel_str = rel.to_string_lossy().replace('\\', "/"); + let output = Command::new("git") + .arg("-C") + .arg(repo) + .arg("show") + .arg(format!("HEAD:{rel_str}")) + .output() + .ok()?; + if !output.status.success() { + return None; + } + String::from_utf8(output.stdout).ok() +} + +fn extract_file_path_for_tool(input: &serde_json::Value) -> Option { + for k in ["file_path", "path", "filename", "file"] { + if let Some(s) = input.get(k).and_then(|v| v.as_str()) { + return Some(s.to_string()); + } + } + None +} + /// Extract delegation info from Task tool invocations. fn extract_delegations(tool_uses: &[ToolInvocation]) -> Vec { tool_uses @@ -302,6 +395,46 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { let total_usage = sum_usage(&turns); let files_changed = extract_files_changed(&turns); + // Pull path-level base/producer from the first entry that carries the + // metadata (Claude records cwd / git_branch / version on every + // conversational entry; the first one is the canonical "this is where + // we started"). + let mut base = toolpath_convo::SessionBase::default(); + let mut producer_version: Option = None; + for entry in &convo.entries { + if base.working_dir.is_none() + && let Some(cwd) = &entry.cwd + { + base.working_dir = Some(cwd.clone()); + } + if base.vcs_branch.is_none() + && let Some(b) = &entry.git_branch + { + base.vcs_branch = Some(b.clone()); + } + if producer_version.is_none() + && let Some(v) = &entry.version + { + producer_version = Some(v.clone()); + } + if base.working_dir.is_some() && base.vcs_branch.is_some() && producer_version.is_some() { + break; + } + } + let view_base = if base.working_dir.is_some() + || base.vcs_branch.is_some() + || base.vcs_revision.is_some() + || base.vcs_remote.is_some() + { + Some(base) + } else { + None + }; + let producer = producer_version.map(|v| toolpath_convo::ProducerInfo { + name: "claude-code".into(), + version: Some(v), + }); + ConversationView { id: convo.session_id.clone(), started_at: convo.started_at, @@ -312,7 +445,8 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { files_changed, session_ids: vec![], events, - ..Default::default() + base: view_base, + producer, } } From c70f5fc6df5ede904d42660eeceddd6889c3e50d Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Thu, 14 May 2026 16:20:11 -0400 Subject: [PATCH 07/10] toolpath-gemini: migrate to shared derive_path; provider unification complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gemini's `derive.rs` shrinks from 912 lines to ~135. All gemini-specific work moves into `provider.rs`: - `conversation_to_view` sets `view.base` from `project_path` or `main.directories()` and `view.producer` (name: "gemini-cli", no version recorded in the source format). - New `compute_file_mutations(msg.tool_calls())` helper populates `Turn.file_mutations` for every `FileWrite`-category call. Diff preference: gemini's own `resultDisplay.fileDiff` when present (the harness already computed it), otherwise a hand-rolled fallback from args (old/new pair for `replace`, content for `write_file`). `tool_id` links to the source `ToolCall::id`. - `fallback_raw_diff` moved verbatim from `derive.rs`. - Turns are linked sequentially via `parent_id` since Gemini's wire format doesn't carry one on messages. (Sub-agent linearization via `Turn.delegations` was already working — unchanged.) Behavior changes for gemini-derived paths: - File-mutation `structural.type` unifies on `"file.write"` (was `gemini.write_file` / `gemini.replace` / `gemini.edit`). - Tool-call summary array on `conversation.append` dropped (same redundancy as we addressed for codex/opencode/claude). - Conversation-artifact key shifts to `gemini-cli://` (matches the `://` convention every other provider now uses). `derive.rs` reduced to: wrap with title override + `view = to_view()` + delegate to `toolpath_convo::derive_path`. Old test suite (26 tests) replaced with four smoke checks. This is the last provider migration. All five conversation provider crates (`pi`, `codex`, `opencode`, `claude`, `gemini`) now share the canonical `ConversationView → Path` mapping. Each provider's `derive.rs` is a one-line wrapper; the IO and provider-specific fidelity work lives in `to_view`. Cross-harness `matrix_translation` passes for all 25 cells. --- crates/toolpath-gemini/src/derive.rs | 924 ++----------------------- crates/toolpath-gemini/src/provider.rs | 108 ++- 2 files changed, 168 insertions(+), 864 deletions(-) diff --git a/crates/toolpath-gemini/src/derive.rs b/crates/toolpath-gemini/src/derive.rs index f0bc090..c4c7fa1 100644 --- a/crates/toolpath-gemini/src/derive.rs +++ b/crates/toolpath-gemini/src/derive.rs @@ -1,24 +1,14 @@ //! Derive Toolpath documents from Gemini CLI conversation logs. //! -//! The conversation is modeled as an artifact at -//! `gemini://`. Each turn appends to that artifact via a -//! `conversation.append` structural change. File mutations from -//! `write_file` and `replace` tool calls appear as sibling artifacts in -//! the same step's `change` map. -//! -//! Sub-agent chats are linearized into the path as additional steps -//! parented to the main assistant step whose `task` tool invocation -//! spawned them (document order, matching [`crate::provider`]). +//! Thin wrapper around the shared [`toolpath_convo::derive_path`]. All +//! Gemini-specific work (sub-agent linearization, file-diff synthesis, +//! producer/base population) happens in +//! [`crate::provider::to_view`]; nothing provider-specific lives in +//! this module. -use crate::provider::{file_path_from_args, tool_category}; -use crate::types::{ChatFile, Conversation, GeminiMessage, GeminiRole, ToolCall}; -use serde_json::json; -use std::collections::HashMap; -use toolpath::v1::{ - ActorDefinition, ArtifactChange, Base, Identity, Path, PathIdentity, PathMeta, Step, - StepIdentity, StructuralChange, -}; -use toolpath_convo::ToolCategory; +use crate::provider::to_view; +use crate::types::Conversation; +use toolpath::v1::Path; /// Configuration for deriving Toolpath documents from Gemini conversations. #[derive(Debug, Clone, Default)] @@ -31,110 +21,22 @@ pub struct DeriveConfig { /// Derive a single Toolpath [`Path`] from a Gemini conversation. pub fn derive_path(conversation: &Conversation, config: &DeriveConfig) -> Path { - let session_short = safe_prefix(&conversation.main.session_id, 8); - let path_id = if session_short.is_empty() { - format!("path-gemini-{}", safe_prefix(&conversation.session_uuid, 8)) - } else { - format!("path-gemini-{}", session_short) - }; - let convo_artifact = convo_artifact_uri(&conversation.main); - - let mut actors: HashMap = HashMap::new(); - let mut steps: Vec = Vec::new(); - - // Index sub-agents deterministically by start_time so we attach them - // in the same order as the provider. - let mut sub_order: Vec<&ChatFile> = conversation.sub_agents.iter().collect(); - sub_order.sort_by_key(|s| s.start_time); - let mut sub_iter = sub_order.into_iter(); - - let mut last_step_id: Option = None; - - for msg in &conversation.main.messages { - let Some(step) = build_step( - msg, - &convo_artifact, - last_step_id.as_deref(), - &mut actors, - config, - ) else { - continue; - }; - let step_id = step.step.id.clone(); - steps.push(step); - - // For each delegation-category tool call, pull the next sub-agent - // off the queue and append its messages as steps parented under - // this main step. - let delegation_calls: Vec<&ToolCall> = msg - .tool_calls() - .iter() - .filter(|t| tool_category(&t.name) == Some(ToolCategory::Delegation)) - .collect(); - for _ in &delegation_calls { - if let Some(sub) = sub_iter.next() { - append_sub_agent_steps(sub, &step_id, &mut steps, &mut actors, config); - } - } - - last_step_id = Some(step_id); - } - - // Leftover sub-agents attach to the last step we emitted. - let leftover: Vec<&ChatFile> = sub_iter.collect(); - if !leftover.is_empty() - && let Some(parent) = last_step_id.clone() - { - for sub in leftover { - append_sub_agent_steps(sub, &parent, &mut steps, &mut actors, config); + let view = to_view(conversation); + let prefix: String = view.id.chars().take(8).collect(); + let base_uri = config.project_path.as_ref().map(|p| { + if p.starts_with('/') { + format!("file://{}", p) + } else { + p.clone() } - } - - let head = last_step_id.unwrap_or_else(|| "empty".to_string()); - - let base_uri = config - .project_path - .clone() - .or_else(|| conversation.project_path.clone()) - .or_else(|| { - conversation - .main - .directories() - .first() - .map(|p| p.to_string_lossy().to_string()) - }) - .map(|p| format!("file://{}", p)); - - Path { - path: PathIdentity { - id: path_id, - base: base_uri.map(|uri| Base { - uri, - ref_str: None, - branch: None, - }), - head, - graph_ref: None, - }, - steps, - meta: Some(PathMeta { - title: Some(format!( - "Gemini session: {}", - if session_short.is_empty() { - safe_prefix(&conversation.session_uuid, 8) - } else { - session_short - } - )), - source: Some("gemini-cli".to_string()), - actors: if actors.is_empty() { - None - } else { - Some(actors) - }, - ..Default::default() - }), - } + }); + let cfg = toolpath_convo::DeriveConfig { + base_uri, + title: Some(format!("Gemini session: {}", prefix)), + include_thinking: config.include_thinking, + ..Default::default() + }; + toolpath_convo::derive_path(&view, &cfg) } /// Derive Toolpath Paths from multiple conversations. @@ -145,416 +47,51 @@ pub fn derive_project(conversations: &[Conversation], config: &DeriveConfig) -> .collect() } -// ── Step construction ──────────────────────────────────────────────── - -fn build_step( - msg: &GeminiMessage, - convo_artifact: &str, - parent_id: Option<&str>, - actors: &mut HashMap, - config: &DeriveConfig, -) -> Option { - if msg.id.is_empty() { - return None; - } - - let (actor, role_str) = resolve_actor(msg, actors); - - let mut file_changes: HashMap = HashMap::new(); - let mut text_parts: Vec = Vec::new(); - let mut tool_calls_meta: Vec = Vec::new(); - - let content_text = msg.content.text(); - if !content_text.trim().is_empty() { - text_parts.push(content_text); - } - if config.include_thinking && !msg.thoughts().is_empty() { - for t in msg.thoughts() { - let subject = t.subject.as_deref().unwrap_or(""); - let description = t.description.as_deref().unwrap_or(""); - let combined = match (subject.is_empty(), description.is_empty()) { - (false, false) => format!("[thinking: {}] {}", subject, description), - (false, true) => format!("[thinking] {}", subject), - (true, false) => format!("[thinking] {}", description), - (true, true) => continue, - }; - text_parts.push(combined); - } - } - - for call in msg.tool_calls() { - tool_calls_meta.push(serde_json::json!({ - "name": call.name, - "status": call.status, - "summary": tool_call_summary(call), - })); - if matches!(tool_category(&call.name), Some(ToolCategory::FileWrite)) - && let Some(fp) = file_path_from_args(&call.args) - { - let new_change = build_file_write_change(call); - // If the same file is touched twice by one message (rare but - // possible), prefer the first; downstream steps show the - // later edit distinctly. - file_changes.entry(fp).or_insert(new_change); - } - } - - if text_parts.is_empty() && tool_calls_meta.is_empty() && file_changes.is_empty() { - return None; - } - - let mut convo_extra = HashMap::new(); - convo_extra.insert("role".to_string(), json!(role_str)); - if !text_parts.is_empty() { - let combined = text_parts.join("\n\n"); - convo_extra.insert("text".to_string(), json!(combined)); - } - if !tool_calls_meta.is_empty() { - convo_extra.insert("tool_calls".to_string(), json!(tool_calls_meta)); - } - - let convo_change = ArtifactChange { - raw: None, - structural: Some(StructuralChange { - change_type: "conversation.append".to_string(), - extra: convo_extra, - }), - }; - - let mut changes: HashMap = HashMap::new(); - changes.insert(convo_artifact.to_string(), convo_change); - changes.extend(file_changes); - - let step_id = format!("step-{}", safe_prefix(&msg.id, 8)); - let parents = parent_id.map(|p| vec![p.to_string()]).unwrap_or_default(); - - Some(Step { - step: StepIdentity { - id: step_id, - parents, - actor, - timestamp: msg.timestamp.clone(), - }, - change: changes, - meta: None, - }) -} - -/// Build an `ArtifactChange` for a single file-write tool invocation. -/// -/// Always populates at least one perspective (per RFC §"Change -/// Perspectives"): `raw` is preferred when Gemini's `resultDisplay` -/// carries a `fileDiff`; otherwise we fall back to a hand-rolled -/// unified-diff hunk for `replace`, or a "new file" hunk for -/// `write_file`. `structural` mirrors the tool name and captures the -/// raw args (trimmed) so downstream consumers have machine-readable -/// detail. -fn build_file_write_change(call: &ToolCall) -> ArtifactChange { - let raw = call.file_diff().or_else(|| fallback_raw_diff(call)); - let structural = Some(StructuralChange { - change_type: format!("gemini.{}", call.name), - extra: structural_extra_for(call), - }); - ArtifactChange { raw, structural } -} - -/// Compact human-readable summary of a tool call's salient args. Used -/// in `conversation.append` structural payloads so shell commands, -/// grep patterns, read targets, etc. aren't dropped during derivation. -fn tool_call_summary(call: &ToolCall) -> String { - let pick = |k: &str| -> Option<&str> { call.args.get(k).and_then(|v| v.as_str()) }; - let summary = match call.name.as_str() { - "run_shell_command" => pick("command").map(str::to_string), - "read_file" | "read_many_files" | "list_directory" => pick("file_path") - .or_else(|| pick("path")) - .map(str::to_string), - "write_file" | "replace" | "edit" => pick("file_path").map(str::to_string), - "glob" => pick("pattern").map(str::to_string), - "grep_search" | "search_file_content" => pick("pattern").map(str::to_string), - "web_fetch" => pick("url").map(str::to_string), - "google_web_search" => pick("query").map(str::to_string), - "task" | "activate_skill" => pick("prompt").map(str::to_string), - "get_internal_docs" => pick("path").map(str::to_string), - _ => None, - }; - summary.unwrap_or_default() -} - -fn structural_extra_for(call: &ToolCall) -> HashMap { - let mut extra = HashMap::new(); - match call.name.as_str() { - "write_file" => { - let content = call - .args - .get("content") - .and_then(|v| v.as_str()) - .unwrap_or(""); - extra.insert("operation".into(), json!("write")); - extra.insert("byte_count".into(), json!(content.len())); - extra.insert("line_count".into(), json!(content.lines().count())); - } - "replace" => { - let old_s = call - .args - .get("old_string") - .and_then(|v| v.as_str()) - .unwrap_or(""); - let new_s = call - .args - .get("new_string") - .and_then(|v| v.as_str()) - .unwrap_or(""); - let instruction = call - .args - .get("instruction") - .and_then(|v| v.as_str()) - .unwrap_or(""); - extra.insert("operation".into(), json!("replace")); - extra.insert("old_string".into(), json!(old_s)); - extra.insert("new_string".into(), json!(new_s)); - if !instruction.is_empty() { - extra.insert("instruction".into(), json!(instruction)); - } - } - "edit" => { - extra.insert("operation".into(), json!("edit")); - } - _ => { - extra.insert("operation".into(), json!(call.name.clone())); - } - } - extra.insert("status".into(), json!(call.status)); - extra -} - -/// Construct a unified-diff hunk when Gemini's `resultDisplay.fileDiff` -/// is absent. Not pixel-perfect but good enough to give readers a -/// change perspective. -fn fallback_raw_diff(call: &ToolCall) -> Option { - match call.name.as_str() { - "replace" => { - let old_s = call.args.get("old_string").and_then(|v| v.as_str())?; - let new_s = call.args.get("new_string").and_then(|v| v.as_str())?; - let old_lines: Vec<&str> = old_s.split('\n').collect(); - let new_lines: Vec<&str> = new_s.split('\n').collect(); - let mut buf = format!("@@ -1,{} +1,{} @@\n", old_lines.len(), new_lines.len()); - for l in old_lines { - buf.push('-'); - buf.push_str(l); - buf.push('\n'); - } - for l in new_lines { - buf.push('+'); - buf.push_str(l); - buf.push('\n'); - } - Some(buf) - } - "write_file" => { - let content = call.args.get("content").and_then(|v| v.as_str())?; - let lines: Vec<&str> = content.split('\n').collect(); - let mut buf = format!("@@ -0,0 +1,{} @@\n", lines.len()); - for l in lines { - buf.push('+'); - buf.push_str(l); - buf.push('\n'); - } - Some(buf) - } - _ => None, - } -} - -/// Append every message in a sub-agent chat as a step parented under -/// `parent_step_id`, linearizing internally. -fn append_sub_agent_steps( - sub: &ChatFile, - parent_step_id: &str, - steps: &mut Vec, - actors: &mut HashMap, - config: &DeriveConfig, -) { - let convo_artifact = convo_artifact_uri(sub); - let mut local_parent = parent_step_id.to_string(); - - for msg in &sub.messages { - if let Some(mut step) = - build_step(msg, &convo_artifact, Some(&local_parent), actors, config) - { - // Prefix sub-agent step IDs to avoid collisions with main-chat - // step IDs (which are derived from the message UUID prefix). - let session_tag = if sub.session_id.is_empty() { - "sub".to_string() - } else { - safe_prefix(&sub.session_id, 6) - }; - step.step.id = format!("sub-{}-{}", session_tag, safe_prefix(&msg.id, 8)); - step.step.parents = vec![local_parent.clone()]; - local_parent = step.step.id.clone(); - steps.push(step); - } - } -} - -fn resolve_actor( - msg: &GeminiMessage, - actors: &mut HashMap, -) -> (String, &'static str) { - match &msg.role { - GeminiRole::User => { - actors - .entry("human:user".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("User".to_string()), - ..Default::default() - }); - ("human:user".to_string(), "user") - } - GeminiRole::Gemini => { - let (actor_key, model_str) = match &msg.model { - Some(m) if !m.is_empty() => (format!("agent:{}", m), m.clone()), - _ => ("agent:gemini-cli".to_string(), "gemini-cli".to_string()), - }; - actors - .entry(actor_key.clone()) - .or_insert_with(|| ActorDefinition { - name: Some("Gemini CLI".to_string()), - provider: Some("google".to_string()), - model: Some(model_str.clone()), - identities: vec![Identity { - system: "google".to_string(), - id: model_str, - }], - ..Default::default() - }); - (actor_key, "gemini") - } - GeminiRole::Info => { - actors - .entry("system:gemini-cli".to_string()) - .or_insert_with(|| ActorDefinition { - name: Some("Gemini CLI system".to_string()), - provider: Some("google".to_string()), - ..Default::default() - }); - ("system:gemini-cli".to_string(), "info") - } - GeminiRole::Other(s) => { - let key = format!("other:{}", s); - actors - .entry(key.clone()) - .or_insert_with(|| ActorDefinition { - name: Some(s.clone()), - ..Default::default() - }); - // Static string only — unknown roles render as "other" in the - // conversation.append payload for readability. - (key, "other") - } - } -} - -fn convo_artifact_uri(chat: &ChatFile) -> String { - let sid = if chat.session_id.is_empty() { - "unknown".to_string() - } else { - chat.session_id.clone() - }; - format!("gemini://{}", sid) -} - -fn safe_prefix(s: &str, n: usize) -> String { - s.chars().take(n).collect() -} - -// ── Tests ──────────────────────────────────────────────────────────── - #[cfg(test)] mod tests { use super::*; - use crate::types::ChatFile; - use serde_json::Value; - - fn parse_chat(s: &str) -> ChatFile { - serde_json::from_str(s).unwrap() - } - - fn main_only_convo() -> Conversation { - let chat = parse_chat( - r#"{ - "sessionId":"sess1", - "projectHash":"h", - "startTime":"2026-04-17T10:00:00Z", - "lastUpdated":"2026-04-17T10:10:00Z", - "directories":["/abs/project"], - "messages":[ - {"id":"user-1111aaaa","timestamp":"2026-04-17T10:00:00Z","type":"user","content":[{"text":"Fix the bug"}]}, - {"id":"ai-2222bbbb","timestamp":"2026-04-17T10:00:01Z","type":"gemini","content":"I'll look.","model":"gemini-3-flash-preview"}, - {"id":"ai-3333cccc","timestamp":"2026-04-17T10:01:00Z","type":"gemini","content":"Writing fix.","model":"gemini-3-flash-preview","toolCalls":[ - {"id":"w1","name":"write_file","args":{"file_path":"/abs/project/src/main.rs","content":"fn main(){}"},"status":"success","timestamp":"2026-04-17T10:01:00Z","result":[{"functionResponse":{"id":"w1","name":"write_file","response":{"output":"ok"}}}]} - ]} - ] -}"#, - ); - let mut convo = Conversation::new("uuid-1".to_string(), chat); - convo.project_path = Some("/abs/project".to_string()); - convo - } - - #[test] - fn test_derive_path_basic() { - let convo = main_only_convo(); - let path = derive_path(&convo, &DeriveConfig::default()); - assert!(path.path.id.starts_with("path-gemini-")); - assert_eq!(path.steps.len(), 3); - assert_eq!(path.steps[0].step.actor, "human:user"); - assert!(path.steps[1].step.actor.starts_with("agent:")); - } - - #[test] - fn test_derive_path_head_is_last_step() { - let convo = main_only_convo(); - let path = derive_path(&convo, &DeriveConfig::default()); - assert_eq!(path.path.head, path.steps.last().unwrap().step.id); - } - - #[test] - fn test_derive_path_parents_chain() { - let convo = main_only_convo(); - let path = derive_path(&convo, &DeriveConfig::default()); - assert!(path.steps[0].step.parents.is_empty()); - assert_eq!( - path.steps[1].step.parents, - vec![path.steps[0].step.id.clone()] - ); - assert_eq!( - path.steps[2].step.parents, - vec![path.steps[1].step.id.clone()] - ); + use crate::types::{ChatFile, Conversation}; + use toolpath::v1::Graph; + + fn make_convo() -> Conversation { + let main_json = r#"{ + "sessionId": "sess-1", + "messages": [ + {"id":"u1","timestamp":"2026-04-17T15:23:55Z","type":"user","content":[{"text":"make a pickle"}]}, + {"id":"g1","timestamp":"2026-04-17T15:23:57Z","type":"gemini","content":"done","model":"gemini-3-flash-preview"} + ] + }"#; + let main: ChatFile = serde_json::from_str(main_json).unwrap(); + Conversation { + session_uuid: "abcdef01-2345-6789-abcd-ef0123456789".into(), + main, + sub_agents: vec![], + project_path: Some("/tmp/proj".into()), + started_at: None, + last_activity: None, + } } #[test] - fn test_derive_path_conversation_artifact() { - let convo = main_only_convo(); + fn derive_path_basic_shape() { + let convo = make_convo(); let path = derive_path(&convo, &DeriveConfig::default()); - let artifact = "gemini://sess1"; - assert!(path.steps[0].change.contains_key(artifact)); - let structural = path.steps[0].change[artifact].structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "conversation.append"); - assert_eq!(structural.extra["role"], "user"); + assert!(path.path.id.starts_with("path-gemini-cli-")); + let base = path.path.base.as_ref().expect("base"); + assert_eq!(base.uri, "file:///tmp/proj"); } #[test] - fn test_derive_path_file_write_artifact() { - let convo = main_only_convo(); + fn derive_path_producer_in_meta_extra() { + let convo = make_convo(); let path = derive_path(&convo, &DeriveConfig::default()); - let write_step = &path.steps[2]; - assert!(write_step.change.contains_key("/abs/project/src/main.rs")); + let producer = path.meta.as_ref().unwrap().extra.get("producer").unwrap(); + assert_eq!(producer["name"], "gemini-cli"); } #[test] - fn test_derive_path_actors_populated() { - let convo = main_only_convo(); + fn derive_path_actors_populated() { + let convo = make_convo(); let path = derive_path(&convo, &DeriveConfig::default()); let actors = path.meta.as_ref().unwrap().actors.as_ref().unwrap(); assert!(actors.contains_key("human:user")); @@ -562,351 +99,14 @@ mod tests { } #[test] - fn test_derive_path_base_from_project_path() { - let convo = main_only_convo(); - let path = derive_path( - &convo, - &DeriveConfig { - project_path: Some("/override".to_string()), - include_thinking: false, - }, - ); - assert_eq!(path.path.base.as_ref().unwrap().uri, "file:///override"); - } - - #[test] - fn test_derive_path_base_from_directories_fallback() { - // Scrub project_path from conversation: should fall back to directories[0] - let mut convo = main_only_convo(); - convo.project_path = None; - let path = derive_path(&convo, &DeriveConfig::default()); - assert_eq!(path.path.base.as_ref().unwrap().uri, "file:///abs/project"); - } - - #[test] - fn test_derive_path_no_base_when_unknown() { - let mut convo = main_only_convo(); - convo.project_path = None; - convo.main.directories = None; - let path = derive_path(&convo, &DeriveConfig::default()); - assert!(path.path.base.is_none()); - } - - #[test] - fn test_derive_path_skips_empty_messages() { - let chat = parse_chat( - r#"{ - "sessionId":"x","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"user","content":""}, - {"id":"m2","timestamp":"ts","type":"user","content":[{"text":" "}]}, - {"id":"m3","timestamp":"ts","type":"user","content":[{"text":"hello"}]} - ] -}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - assert_eq!(path.steps.len(), 1); - assert_eq!(path.steps[0].step.id, "step-m3"); - } - - #[test] - fn test_derive_path_falls_back_to_gemini_cli_actor() { - let chat = parse_chat( - r#"{ - "sessionId":"x","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"hello"} - ] -}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - assert_eq!(path.steps[0].step.actor, "agent:gemini-cli"); - } - - #[test] - fn test_derive_path_with_replace_tool() { - let chat = parse_chat( - r#"{ - "sessionId":"x","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"","toolCalls":[ - {"id":"r","name":"replace","args":{"file_path":"src/a.rs","oldString":"x","newString":"y"},"status":"success","timestamp":"ts"} - ]} - ] -}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - assert!(path.steps[0].change.contains_key("src/a.rs")); - } - - #[test] - fn test_derive_path_thinking_included_when_enabled() { - let chat = parse_chat( - r#"{ - "sessionId":"x","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"plan","thoughts":[{"subject":"s","description":"deep thought","timestamp":"ts"}]} - ] -}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path( - &convo, - &DeriveConfig { - project_path: None, - include_thinking: true, - }, - ); - let text = path.steps[0].change["gemini://x"] - .structural - .as_ref() - .unwrap() - .extra["text"] - .as_str() - .unwrap(); - assert!(text.contains("deep thought")); - } - - #[test] - fn test_derive_path_thinking_omitted_by_default() { - let chat = parse_chat( - r#"{ - "sessionId":"x","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"plan","thoughts":[{"subject":"s","description":"deep thought","timestamp":"ts"}]} - ] -}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - let text = path.steps[0].change["gemini://x"] - .structural - .as_ref() - .unwrap() - .extra["text"] - .as_str() - .unwrap(); - assert!(!text.contains("deep thought")); - assert!(text.contains("plan")); - } - - #[test] - fn test_derive_path_sub_agent_steps() { - // Main chat delegates via `task`; sub-agent messages become extra - // steps parented under the main step. - let main_chat = parse_chat( - r#"{ - "sessionId":"m","projectHash":"","messages":[ - {"id":"u1","timestamp":"ts","type":"user","content":[{"text":"go"}]}, - {"id":"a1","timestamp":"ts","type":"gemini","content":"delegating","model":"gemini-3-flash-preview","toolCalls":[ - {"id":"t","name":"task","args":{"prompt":"search"},"status":"success","timestamp":"ts"} - ]} - ] -}"#, - ); - let sub_chat = parse_chat( - r#"{ - "sessionId":"subby","projectHash":"","kind":"subagent","summary":"found","startTime":"2026-04-17T10:00:00Z","messages":[ - {"id":"sa","timestamp":"ts","type":"user","content":[{"text":"sub prompt"}]}, - {"id":"sb","timestamp":"ts","type":"gemini","content":"sub response","model":"gemini-3-flash-preview"} - ] -}"#, - ); - let mut convo = Conversation::new("uuid".into(), main_chat); - convo.sub_agents.push(sub_chat); - - let path = derive_path(&convo, &DeriveConfig::default()); - - // 2 main steps + 2 sub steps - assert_eq!(path.steps.len(), 4); - // Sub steps have IDs starting with "sub-" - assert!(path.steps[2].step.id.starts_with("sub-")); - assert!(path.steps[3].step.id.starts_with("sub-")); - // First sub step is parented under the main assistant step (a1 -> step-a1) - assert_eq!(path.steps[2].step.parents, vec!["step-a1".to_string()]); - // Second sub step is parented under the first sub step - assert_eq!( - path.steps[3].step.parents, - vec![path.steps[2].step.id.clone()] - ); - // Sub-agent artifact URI distinct from main - assert!(path.steps[2].change.contains_key("gemini://subby")); - assert!(path.steps[0].change.contains_key("gemini://m")); - } - - #[test] - fn test_derive_path_leftover_subagent_attaches_to_last() { - // No `task` invocation, but a sub-agent file exists. - let main_chat = parse_chat( - r#"{ - "sessionId":"m","projectHash":"","messages":[ - {"id":"u1","timestamp":"ts","type":"user","content":[{"text":"go"}]} - ] -}"#, - ); - let sub_chat = parse_chat( - r#"{ - "sessionId":"unlinked","projectHash":"","kind":"subagent","startTime":"2026-04-17T10:00:00Z","messages":[ - {"id":"sx","timestamp":"ts","type":"user","content":[{"text":"something"}]} - ] -}"#, - ); - let mut convo = Conversation::new("uuid".into(), main_chat); - convo.sub_agents.push(sub_chat); - - let path = derive_path(&convo, &DeriveConfig::default()); - // One main + one sub - assert_eq!(path.steps.len(), 2); - assert!(path.steps[1].step.id.starts_with("sub-")); - // Attached to the last main step (step-u1) - assert_eq!(path.steps[1].step.parents, vec!["step-u1".to_string()]); - } - - #[test] - fn test_derive_project_multiple() { - let a = main_only_convo(); - let b = { - let mut c = main_only_convo(); - c.main.session_id = "sess2".into(); - c.session_uuid = "uuid-2".into(); - c - }; - let paths = derive_project(&[a, b], &DeriveConfig::default()); - assert_eq!(paths.len(), 2); - assert!(paths[0].path.id.contains("sess1")); - assert!(paths[1].path.id.contains("sess2")); - } - - #[test] - fn test_safe_prefix_behaviour() { - assert_eq!(safe_prefix("abc", 8), "abc"); - assert_eq!(safe_prefix("abcdefghij", 8), "abcdefgh"); - assert_eq!(safe_prefix("日本語", 2), "日本"); - } - - #[test] - fn test_convo_artifact_uri_unknown_fallback() { - let chat = parse_chat(r#"{"sessionId":"","projectHash":"","messages":[]}"#); - assert_eq!(convo_artifact_uri(&chat), "gemini://unknown"); - } - - #[test] - fn test_path_id_falls_back_to_session_uuid() { - let chat = parse_chat( - r#"{"sessionId":"","projectHash":"","messages":[{"id":"m","timestamp":"ts","type":"user","content":[{"text":"hi"}]}]}"#, - ); - let convo = Conversation::new("long-session-uuid-123".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - assert!(path.path.id.starts_with("path-gemini-")); - // Should use a prefix of the session UUID when sessionId is empty - assert!(path.path.id.contains("long-ses")); - } - - #[test] - fn test_conversation_artifact_extra_fields() { - let convo = main_only_convo(); - let path = derive_path(&convo, &DeriveConfig::default()); - let structural = path.steps[2].change["gemini://sess1"] - .structural - .as_ref() - .unwrap(); - assert_eq!(structural.extra["role"], "gemini"); - let calls = structural.extra["tool_calls"].as_array().unwrap(); - assert_eq!(calls[0]["name"], Value::String("write_file".to_string())); - assert_eq!(calls[0]["summary"], "/abs/project/src/main.rs"); - } - - #[test] - fn test_info_message_becomes_system_step() { - let chat = parse_chat( - r#"{"sessionId":"s","projectHash":"","messages":[ - {"id":"u1","timestamp":"ts","type":"user","content":[{"text":"hi"}]}, - {"id":"i1","timestamp":"ts","type":"info","content":"Request cancelled."} -]}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - assert_eq!(path.steps.len(), 2); - assert_eq!(path.steps[1].step.actor, "system:gemini-cli"); - } - - #[test] - fn test_file_write_change_has_perspectives() { - // Verify at least one change perspective per RFC §"Change Perspectives" - let chat = parse_chat( - r#"{"sessionId":"s","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"","toolCalls":[ - {"id":"w1","name":"write_file","args":{"file_path":"src/main.rs","content":"fn main() {}\n"},"status":"success","timestamp":"ts"} - ]} -]}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - let change = &path.steps[0].change["src/main.rs"]; - assert!( - change.raw.is_some() || change.structural.is_some(), - "at least one perspective must be populated" - ); - assert!(change.structural.is_some()); - let structural = change.structural.as_ref().unwrap(); - assert_eq!(structural.change_type, "gemini.write_file"); - assert_eq!(structural.extra["operation"], "write"); - assert_eq!(structural.extra["byte_count"], 13); - // Fallback raw diff constructed from content - assert!(change.raw.as_ref().unwrap().contains("+fn main() {}")); - } - - #[test] - fn test_replace_change_has_diff() { - let chat = parse_chat( - r#"{"sessionId":"s","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"","toolCalls":[ - {"id":"r1","name":"replace","args":{"file_path":"src/main.rs","old_string":"hello","new_string":"world","instruction":"swap"},"status":"success","timestamp":"ts"} - ]} -]}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - let change = &path.steps[0].change["src/main.rs"]; - let raw = change.raw.as_ref().unwrap(); - assert!(raw.contains("-hello")); - assert!(raw.contains("+world")); - let structural = change.structural.as_ref().unwrap(); - assert_eq!(structural.extra["operation"], "replace"); - assert_eq!(structural.extra["instruction"], "swap"); - } - - #[test] - fn test_file_diff_preferred_over_fallback() { - // When Gemini provides resultDisplay.fileDiff, it should be used as - // the raw perspective verbatim. - let chat = parse_chat( - r#"{"sessionId":"s","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"","toolCalls":[ - {"id":"r1","name":"replace","args":{"file_path":"a.rs","old_string":"x","new_string":"y"},"status":"success","timestamp":"ts","resultDisplay":{"fileDiff":"Index: a.rs\n...GEMINI DIFF..."}} - ]} -]}"#, - ); - let convo = Conversation::new("uuid".into(), chat); - let path = derive_path(&convo, &DeriveConfig::default()); - let raw = path.steps[0].change["a.rs"].raw.as_ref().unwrap(); - assert!(raw.contains("GEMINI DIFF")); - } - - #[test] - fn test_tool_call_summary_preserves_shell_command() { - let chat = parse_chat( - r#"{"sessionId":"s","projectHash":"","messages":[ - {"id":"m1","timestamp":"ts","type":"gemini","content":"building","toolCalls":[ - {"id":"s1","name":"run_shell_command","args":{"command":"cargo build --release"},"status":"success","timestamp":"ts"} - ]} -]}"#, - ); - let convo = Conversation::new("uuid".into(), chat); + fn derive_path_validates_as_single_path_graph() { + let convo = make_convo(); let path = derive_path(&convo, &DeriveConfig::default()); - let structural = path.steps[0].change["gemini://s"] - .structural - .as_ref() - .unwrap(); - let calls = structural.extra["tool_calls"].as_array().unwrap(); - assert_eq!(calls[0]["summary"], "cargo build --release"); + let doc = Graph::from_path(path); + let json = doc.to_json().unwrap(); + let parsed = Graph::from_json(&json).unwrap(); + let pp = parsed.single_path().expect("single-path graph"); + let anc = toolpath::v1::query::ancestors(&pp.steps, &pp.path.head); + assert_eq!(anc.len(), pp.steps.len(), "all steps on head ancestry"); } } diff --git a/crates/toolpath-gemini/src/provider.rs b/crates/toolpath-gemini/src/provider.rs index 32581ef..7248214 100644 --- a/crates/toolpath-gemini/src/provider.rs +++ b/crates/toolpath-gemini/src/provider.rs @@ -103,6 +103,7 @@ fn message_to_turn(msg: &GeminiMessage, working_dir: Option<&str>) -> Turn { .iter() .map(tool_call_to_invocation) .collect(); + let file_mutations = compute_file_mutations(msg.tool_calls()); let token_usage = msg.tokens.as_ref().map(|t| TokenUsage { input_tokens: t.input, @@ -137,7 +138,88 @@ fn message_to_turn(msg: &GeminiMessage, working_dir: Option<&str>) -> Turn { environment, delegations: vec![], extra, - file_mutations: Vec::new(), + file_mutations, + } +} + +/// For each file-write tool call in this message, build a +/// `FileMutation` with a pre-resolved unified diff. Preference order: +/// 1. Gemini's own `resultDisplay.fileDiff` when present (real diff +/// computed by the harness). +/// 2. Hand-rolled fallback from `args` (`old_string`/`new_string` for +/// `replace`, `content` for `write_file`). +/// `tool_id` links back to the [`ToolCall`]. +fn compute_file_mutations(calls: &[ToolCall]) -> Vec { + let mut out = Vec::new(); + for call in calls { + if tool_category(&call.name) != Some(ToolCategory::FileWrite) { + continue; + } + let Some(path) = file_path_from_args(&call.args) else { + continue; + }; + let raw_diff = call.file_diff().or_else(|| fallback_raw_diff(call)); + let operation = match call.name.as_str() { + "write_file" => Some("add".to_string()), + "replace" | "edit" => Some("update".to_string()), + _ => Some(call.name.clone()), + }; + let after = match call.name.as_str() { + "write_file" => call + .args + .get("content") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()), + _ => None, + }; + out.push(toolpath_convo::FileMutation { + path, + tool_id: Some(call.id.clone()), + operation, + raw_diff, + before: None, + after, + rename_to: None, + }); + } + out +} + +/// Synthesize a unified-diff hunk when Gemini's `resultDisplay.fileDiff` +/// is absent. Not pixel-perfect but enough to give readers a change +/// perspective. +fn fallback_raw_diff(call: &ToolCall) -> Option { + match call.name.as_str() { + "replace" => { + let old_s = call.args.get("old_string").and_then(|v| v.as_str())?; + let new_s = call.args.get("new_string").and_then(|v| v.as_str())?; + let old_lines: Vec<&str> = old_s.split('\n').collect(); + let new_lines: Vec<&str> = new_s.split('\n').collect(); + let mut buf = format!("@@ -1,{} +1,{} @@\n", old_lines.len(), new_lines.len()); + for l in old_lines { + buf.push('-'); + buf.push_str(l); + buf.push('\n'); + } + for l in new_lines { + buf.push('+'); + buf.push_str(l); + buf.push('\n'); + } + Some(buf) + } + "write_file" => { + let content = call.args.get("content").and_then(|v| v.as_str())?; + let lines: Vec<&str> = content.split('\n').collect(); + let mut buf = format!("@@ -0,0 +1,{} @@\n", lines.len()); + for l in lines { + buf.push('+'); + buf.push_str(l); + buf.push('\n'); + } + Some(buf) + } + _ => None, } } @@ -361,9 +443,27 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { } } + // Gemini's wire format doesn't carry parent_id on messages, so link + // turns sequentially. (Matches the old `derive_path_from_view`, + // which used `last_step_id` as the parent for each new step.) + let mut prev: Option = None; + for t in turns.iter_mut() { + if t.parent_id.is_none() { + t.parent_id = prev.clone(); + } + prev = Some(t.id.clone()); + } + let total_usage = sum_usage(&turns); let files_changed = extract_files_changed(&turns); + let view_base = working_dir.as_ref().map(|wd| toolpath_convo::SessionBase { + working_dir: Some(wd.clone()), + vcs_revision: None, + vcs_branch: None, + vcs_remote: None, + }); + ConversationView { id: convo.session_uuid.clone(), started_at: convo.started_at, @@ -374,7 +474,11 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { files_changed, session_ids: vec![], events: vec![], - ..Default::default() + base: view_base, + producer: Some(toolpath_convo::ProducerInfo { + name: "gemini-cli".into(), + version: None, + }), } } From 0bb912bfc3e696027d258f72192f887633248eb6 Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Tue, 19 May 2026 10:55:16 -0400 Subject: [PATCH 08/10] toolpath-opencode: compute file_mutations inline; remove post-pass via Turn.extra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors opencode's `Builder` so the snapshot git2 tree↔tree diff IO happens during `handle_assistant_message` (where the source `Part`s are in scope) rather than as a post-pass that reads back the snapshot SHAs from `Turn.extra["opencode"]["snapshots"]`. Concretely: - `Builder` gains `snapshot_repo: Option` and `prev_snapshot_after: Option` fields. - New `compute_turn_mutations(&snapshots, &tool_uses)` method does the diff against the previous turn's `after` snapshot and threads the tool-input fallback for paths the diff missed (gitignored / no-repo). - `build_with_resolver` no longer calls `attach_snapshot_diffs` / `attach_tool_input_fallbacks` — those free functions are gone. `Turn.extra["opencode"]["snapshots"]` is still written for the opencode projector's round-trip path (it consumes those SHAs to re-emit `step-start` / `step-finish` parts). That comes off in the next commit when `Turn.extra` itself is removed. Prep for dropping `Turn.extra` as a pipeline mechanism for IR data; provider-namespaced extras are about to go away entirely. --- crates/toolpath-opencode/src/provider.rs | 171 ++++++++++++----------- 1 file changed, 87 insertions(+), 84 deletions(-) diff --git a/crates/toolpath-opencode/src/provider.rs b/crates/toolpath-opencode/src/provider.rs index eb3b90e..d8f099f 100644 --- a/crates/toolpath-opencode/src/provider.rs +++ b/crates/toolpath-opencode/src/provider.rs @@ -170,6 +170,14 @@ struct Builder<'a> { files_changed_seen: std::collections::HashSet, total_usage: TokenUsage, total_usage_set: bool, + /// Snapshot git repo, when one's been opened by `build_with_resolver`. + /// Used inline by `handle_assistant_message` to compute per-turn + /// `file_mutations` from snapshot tree↔tree diffs. + snapshot_repo: Option, + /// The previous assistant turn's ending snapshot SHA. Used as the + /// `before` of the next turn's snapshot pair so intermediate state + /// captures correctly. + prev_snapshot_after: Option, } impl<'a> Builder<'a> { @@ -182,21 +190,21 @@ impl<'a> Builder<'a> { files_changed_seen: std::collections::HashSet::new(), total_usage: TokenUsage::default(), total_usage_set: false, + snapshot_repo: None, + prev_snapshot_after: None, } } - fn build_with_resolver(self, resolver: &PathResolver) -> ConversationView { + fn build_with_resolver(mut self, resolver: &PathResolver) -> ConversationView { let session_version = self.session.version.clone(); let session_directory = self.session.directory.to_string_lossy().to_string(); let session_project_id = self.session.project_id.clone(); - let snapshot_repo = resolver + self.snapshot_repo = resolver .snapshot_gitdir(&session_project_id, &self.session.directory) .ok() .and_then(|gd| git2::Repository::open(gd).ok()); let mut view = self.build(); - attach_snapshot_diffs(&mut view, snapshot_repo.as_ref()); - attach_tool_input_fallbacks(&mut view); // Producer + base. view.producer = Some(ProducerInfo { @@ -465,7 +473,7 @@ impl<'a> Builder<'a> { if !snapshots.is_empty() { opencode_extra.insert( "snapshots".into(), - Value::Array(snapshots.into_iter().map(Value::String).collect()), + Value::Array(snapshots.iter().cloned().map(Value::String).collect()), ); } if !patches.is_empty() { @@ -479,6 +487,15 @@ impl<'a> Builder<'a> { } extra.insert("opencode".into(), Value::Object(opencode_extra)); + // Compute `file_mutations` for this turn inline: + // 1. If we have a snapshot repo AND a snapshot pair (prev_after, + // this turn's last snapshot), walk the git2 tree↔tree diff + // and add a FileMutation per touched file. + // 2. For any file-write tool whose path wasn't covered by the + // snapshot diff, add a tool-input-derived FileMutation + // (catches gitignored paths and the no-repo case). + let file_mutations = self.compute_turn_mutations(&snapshots, &tool_uses); + self.turns.push(Turn { id: msg.id.clone(), parent_id: if a.parent_id.is_empty() { @@ -505,9 +522,72 @@ impl<'a> Builder<'a> { environment, delegations, extra, - file_mutations: Vec::new(), + file_mutations, }); } + + fn compute_turn_mutations( + &mut self, + snapshots: &[String], + tool_uses: &[ToolInvocation], + ) -> Vec { + let mut out: Vec = Vec::new(); + let mut covered: std::collections::HashSet = std::collections::HashSet::new(); + + // Snapshot diff (when repo + pair available). + if let (Some(repo), Some(first), Some(last)) = + (self.snapshot_repo.as_ref(), snapshots.first(), snapshots.last()) + { + let before = self + .prev_snapshot_after + .clone() + .unwrap_or_else(|| first.clone()); + let after = last.clone(); + self.prev_snapshot_after = Some(after.clone()); + if before != after { + match diff_trees(repo, &before, &after) { + Ok(mutations) => { + for fm in mutations { + covered.insert(fm.path.clone()); + out.push(fm); + } + } + Err(e) => { + eprintln!( + "Warning: snapshot diff {}..{} failed: {}", + &before[..before.len().min(8)], + &after[..after.len().min(8)], + e + ); + } + } + } + } else if let Some(last) = snapshots.last() { + // Track even when we can't diff, so subsequent turns still + // chain off the right `before`. + self.prev_snapshot_after = Some(last.clone()); + } + + // Tool-input fallback for file-write tools whose paths aren't + // already covered by a snapshot-diff mutation. + for tu in tool_uses { + let Some(path) = tool_input_file_path(tu) else { + continue; + }; + if covered.contains(&path) { + continue; + } + covered.insert(path.clone()); + out.push(FileMutation { + path, + tool_id: Some(tu.id.clone()), + operation: Some(tool_to_operation(&tu.name).to_string()), + ..Default::default() + }); + } + + out + } } fn concat_text_parts(parts: &[Part]) -> String { @@ -708,84 +788,7 @@ impl ConversationProvider for OpencodeConvo { } } -// ── Snapshot diff / tool-input fallback ──────────────────────────────── - -/// For each assistant turn, walk its `extra["opencode"]["snapshots"]` -/// across turns and populate `Turn.file_mutations` from the git2 -/// tree↔tree diff of the snapshot pair. No-op when `repo` is `None`. -fn attach_snapshot_diffs(view: &mut ConversationView, repo: Option<&git2::Repository>) { - let Some(repo) = repo else { return }; - let mut prev_after: Option = None; - for turn in view.turns.iter_mut() { - let snapshots: Vec = turn - .extra - .get("opencode") - .and_then(|oc| oc.get("snapshots")) - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str().map(str::to_string)) - .collect() - }) - .unwrap_or_default(); - let (Some(first), Some(last)) = (snapshots.first(), snapshots.last()) else { - continue; - }; - // Prefer the previous turn's `after` snapshot as `before`. - let before = prev_after.clone().unwrap_or_else(|| first.clone()); - let after = last.clone(); - prev_after = Some(after.clone()); - if before == after { - continue; - } - match diff_trees(repo, &before, &after) { - Ok(mutations) => { - for fm in mutations { - turn.file_mutations.push(fm); - } - } - Err(e) => { - eprintln!( - "Warning: snapshot diff {}..{} failed: {}", - &before[..before.len().min(8)], - &after[..after.len().min(8)], - e - ); - } - } - } -} - -/// For each file-write tool invocation whose path isn't already covered -/// by a snapshot-diff `FileMutation`, synthesize a no-raw mutation -/// attributed to the tool. Catches files opencode wrote that are -/// gitignored (so the snapshot pair shows no change) and the case -/// where there's no snapshot repo at all. -fn attach_tool_input_fallbacks(view: &mut ConversationView) { - for turn in view.turns.iter_mut() { - let existing: std::collections::HashSet = turn - .file_mutations - .iter() - .map(|fm| fm.path.clone()) - .collect(); - let mut extras: Vec = Vec::new(); - for tu in &turn.tool_uses { - let Some(path) = tool_input_file_path(tu) else { - continue; - }; - if existing.contains(&path) { - continue; - } - extras.push(FileMutation { - path, - tool_id: Some(tu.id.clone()), - operation: Some(tool_to_operation(&tu.name).to_string()), - ..Default::default() - }); - } - turn.file_mutations.extend(extras); - } -} +// ── Snapshot diff helpers ────────────────────────────────────────────── fn tool_input_file_path(tu: &ToolInvocation) -> Option { tu.input From 0452f6148c7ad43b74f99e0cfdb7d01d032f3a7d Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Tue, 19 May 2026 12:00:29 -0400 Subject: [PATCH 09/10] drop Turn.extra: stop smuggling source-format details through the IR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turn.extra was a per-provider stash (Turn.extra["claude"], Turn.extra["pi"], etc.) for fields that didn't fit the typed ConversationView. Each provider's projector then read its own namespace back out to rebuild the wire format with high fidelity. The IR's job is cross-harness translation, not source-format preservation. Carrying provider-specific blobs end-to-end conflated those two goals and encouraged providers to lean on extras instead of mapping cleanly onto the shared types. With the field removed, every provider now derives onto and projects from the typed IR only. Two ripple effects worth noting: - Pi's tool-result entries no longer emit a standalone Role::Other("tool") turn. The result content folds into the matching assistant turn's tool_uses[i].result via the same pass-2 mechanism the other harnesses already use. Aligns Pi with claude/gemini/codex/opencode and keeps Pi → Pi idempotent without smuggling tool_call_id. - Round-trip fidelity is now best-effort on fields that lived only in extras: codex encrypted reasoning ciphertext, gemini per-thought timestamps, gemini token breakdown (thoughts/tool/total), opencode snapshot/patch metadata, pi compaction/bashExecution metadata. The cross-harness matrix still passes — translation works; only wire-level exact equality on the originating harness regresses. Tests that asserted on the smuggled extra contents have been removed (extras themselves are gone, so the assertions can't express anything). The cross_harness_matrix no_foreign_extras invariant is gone for the same reason. --- crates/path-cli/tests/cross_harness_matrix.rs | 22 +- crates/toolpath-claude/src/project.rs | 122 +------ crates/toolpath-claude/src/provider.rs | 63 ---- crates/toolpath-codex/src/project.rs | 75 +---- crates/toolpath-codex/src/provider.rs | 141 +------- .../toolpath-codex/tests/fixture_roundtrip.rs | 27 +- .../tests/projection_roundtrip.rs | 22 -- crates/toolpath-convo/src/derive.rs | 11 - crates/toolpath-convo/src/extract.rs | 137 -------- crates/toolpath-convo/src/lib.rs | 15 - crates/toolpath-convo/src/project.rs | 7 - crates/toolpath-gemini/src/project.rs | 189 +---------- crates/toolpath-gemini/src/provider.rs | 82 +---- .../tests/projection_roundtrip.rs | 18 +- crates/toolpath-opencode/src/project.rs | 26 +- crates/toolpath-opencode/src/provider.rs | 80 +---- crates/toolpath-pi/src/project.rs | 142 +------- crates/toolpath-pi/src/provider.rs | 308 ++---------------- crates/toolpath-pi/tests/end_to_end.rs | 12 +- 19 files changed, 89 insertions(+), 1410 deletions(-) diff --git a/crates/path-cli/tests/cross_harness_matrix.rs b/crates/path-cli/tests/cross_harness_matrix.rs index 03554e7..8c4ae8e 100644 --- a/crates/path-cli/tests/cross_harness_matrix.rs +++ b/crates/path-cli/tests/cross_harness_matrix.rs @@ -6,7 +6,7 @@ //! than aborting on the first; one cell's failures land grouped under //! its label so triage is possible from a single test run. -use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; use serde_json::Value; @@ -893,25 +893,6 @@ mod invariants { } } - pub fn no_foreign_extras( - view: &ConversationView, - source: &str, - target: &str, - failures: &mut Vec, - ) { - let allowed: HashSet<&str> = [source, target].into_iter().collect(); - const NAMESPACES: &[&str] = &["claude", "gemini", "codex", "pi", "opencode"]; - for (i, turn) in view.turns.iter().enumerate() { - for key in turn.extra.keys() { - if NAMESPACES.contains(&key.as_str()) && !allowed.contains(key.as_str()) { - failures.push(format!( - "turn {} carries foreign-namespace extras key {:?} (allowed: {:?})", - i, key, allowed - )); - } - } - } - } } // ── Matrix runner ──────────────────────────────────────────────────── @@ -945,7 +926,6 @@ fn run_cell( invariants::delegations(&view_first, &view_second, &mut failures); invariants::delegations_survive(&view_after_source, &view_first, &mut failures); invariants::files_changed(&view_first, &view_second, &mut failures); - invariants::no_foreign_extras(&view_second, source.name(), target.name(), &mut failures); failures } diff --git a/crates/toolpath-claude/src/project.rs b/crates/toolpath-claude/src/project.rs index 188fa5d..ef27332 100644 --- a/crates/toolpath-claude/src/project.rs +++ b/crates/toolpath-claude/src/project.rs @@ -298,27 +298,11 @@ fn apply_turn_metadata(entry: &mut ConversationEntry, turn: &Turn) { } } - // From Turn.extra["claude"] - if let Some(claude) = turn.extra.get("claude").and_then(|v| v.as_object()) { - if let Some(v) = claude.get("version").and_then(|v| v.as_str()) { - entry.version = entry.version.take().or_else(|| Some(v.to_string())); - } - if let Some(v) = claude.get("user_type").and_then(|v| v.as_str()) { - entry.user_type = entry.user_type.take().or_else(|| Some(v.to_string())); - } - if let Some(v) = claude.get("request_id").and_then(|v| v.as_str()) { - entry.request_id = entry.request_id.take().or_else(|| Some(v.to_string())); - } - // Merge remaining fields into entry.extra - for (k, v) in claude { - match k.as_str() { - "version" | "user_type" | "request_id" => {} // Already handled above - _ => { - entry.extra.entry(k.clone()).or_insert_with(|| v.clone()); - } - } - } - } + // Source-format details (`version`, `user_type`, `request_id`, + // per-entry catch-all) used to ride through `Turn.extra["claude"]` for + // claude → IR → claude round-trip. The IR no longer carries + // provider-specific extras; the projected entry's fields stay `None` + // and the harness fills in defaults at write time. } /// Build a `ConversationEntry` for a user turn. @@ -1027,7 +1011,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: Default::default(), file_mutations: Vec::new(), } } @@ -1046,7 +1029,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: Default::default(), file_mutations: Vec::new(), } } @@ -1136,7 +1118,6 @@ mod tests { input: serde_json::json!({"file_path": "src/main.rs"}), result: None, category: None, - ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1208,7 +1189,6 @@ mod tests { is_error: false, }), category: None, - ..Default::default() }]; let view = make_view("sess-1", vec![user_turn("u1", "Go"), turn]); @@ -1257,7 +1237,6 @@ mod tests { input: serde_json::json!({}), result: None, // no result category: None, - ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1358,7 +1337,6 @@ mod tests { input: serde_json::json!({"command": "ls"}), result: None, category: None, - ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1390,7 +1368,6 @@ mod tests { is_error: false, }), category: None, - ..Default::default() }, ToolInvocation { id: "t2".to_string(), @@ -1401,7 +1378,6 @@ mod tests { is_error: true, }), category: None, - ..Default::default() }, ]; @@ -1468,7 +1444,6 @@ mod tests { is_error: false, }), category: None, - ..Default::default() }, ToolInvocation { id: "t2".to_string(), @@ -1476,7 +1451,6 @@ mod tests { input: serde_json::json!({}), result: None, // no result for this one category: None, - ..Default::default() }, ]; @@ -1502,80 +1476,6 @@ mod tests { } } - // ── Metadata: user entries get cwd, gitBranch, version, userType ─ - - #[test] - fn test_user_entry_metadata_from_turn() { - let mut turn = user_turn("u1", "Hello"); - turn.environment = Some(EnvironmentSnapshot { - working_dir: Some("/home/user/project".to_string()), - vcs_branch: Some("main".to_string()), - vcs_revision: None, - }); - turn.extra.insert( - "claude".to_string(), - json!({ - "version": "2.1.37", - "user_type": "external", - "entrypoint": "cli", - }), - ); - - let view = make_view("sess-1", vec![turn]); - let convo = ClaudeProjector.project(&view).unwrap(); - - let entry = &content_entries(&convo)[0]; - assert_eq!(entry.cwd.as_deref(), Some("/home/user/project")); - assert_eq!(entry.git_branch.as_deref(), Some("main")); - assert_eq!(entry.version.as_deref(), Some("2.1.37")); - assert_eq!(entry.user_type.as_deref(), Some("external")); - assert_eq!(entry.extra.get("entrypoint"), Some(&json!("cli"))); - } - - // ── Metadata: assistant entries get requestId ───────────────────── - - #[test] - fn test_assistant_entry_metadata_request_id() { - let mut turn = assistant_turn("a1", "Done."); - turn.extra.insert( - "claude".to_string(), - json!({ - "request_id": "req_abc123", - "version": "2.1.37", - }), - ); - - let view = make_view("sess-1", vec![turn]); - let convo = ClaudeProjector.project(&view).unwrap(); - - let entry = &content_entries(&convo)[0]; - assert_eq!(entry.request_id.as_deref(), Some("req_abc123")); - assert_eq!(entry.version.as_deref(), Some("2.1.37")); - } - - // ── Metadata: extras (entrypoint, isMeta, slug) appear ─────────── - - #[test] - fn test_entry_extras_appear_in_projected_entries() { - let mut turn = user_turn("u1", "Hello"); - turn.extra.insert( - "claude".to_string(), - json!({ - "entrypoint": "cli", - "isMeta": true, - "slug": "my-slug", - }), - ); - - let view = make_view("sess-1", vec![turn]); - let convo = ClaudeProjector.project(&view).unwrap(); - - let entry = &content_entries(&convo)[0]; - assert_eq!(entry.extra.get("entrypoint"), Some(&json!("cli"))); - assert_eq!(entry.extra.get("isMeta"), Some(&json!(true))); - assert_eq!(entry.extra.get("slug"), Some(&json!("my-slug"))); - } - // ── Tool result entries inherit metadata from parent turn ───────── #[test] @@ -1586,14 +1486,6 @@ mod tests { vcs_branch: Some("dev".to_string()), vcs_revision: None, }); - turn.extra.insert( - "claude".to_string(), - json!({ - "version": "2.1.37", - "user_type": "external", - "entrypoint": "cli", - }), - ); turn.tool_uses = vec![ToolInvocation { id: "t1".to_string(), name: "Read".to_string(), @@ -1603,7 +1495,6 @@ mod tests { is_error: false, }), category: None, - ..Default::default() }]; let view = make_view("sess-1", vec![turn]); @@ -1615,9 +1506,6 @@ mod tests { let result_entry = &entries[1]; assert_eq!(result_entry.cwd.as_deref(), Some("/project")); assert_eq!(result_entry.git_branch.as_deref(), Some("dev")); - assert_eq!(result_entry.version.as_deref(), Some("2.1.37")); - assert_eq!(result_entry.user_type.as_deref(), Some("external")); - assert_eq!(result_entry.extra.get("entrypoint"), Some(&json!("cli"))); // sourceToolAssistantUUID should be the parent turn's ID assert_eq!( result_entry.extra.get("sourceToolAssistantUUID"), diff --git a/crates/toolpath-claude/src/provider.rs b/crates/toolpath-claude/src/provider.rs index d5c470f..9b2dcb0 100644 --- a/crates/toolpath-claude/src/provider.rs +++ b/crates/toolpath-claude/src/provider.rs @@ -94,7 +94,6 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { input: tu.input.clone(), result, category, - ..Default::default() } }) .collect(); @@ -120,41 +119,6 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { let delegations = extract_delegations(&tool_uses); - // Fold the entry's typed top-level fields into the claude extras blob - // so projection can restore them. Without this, requestId / userType / - // version / sessionId vanish on roundtrip and Claude's UI loses its - // request-correlation metadata. - let mut claude_extras: serde_json::Map = - serde_json::to_value(&entry.extra) - .ok() - .and_then(|v| v.as_object().cloned()) - .unwrap_or_default(); - if let Some(v) = &entry.version { - claude_extras - .entry("version".to_string()) - .or_insert_with(|| serde_json::Value::String(v.clone())); - } - if let Some(v) = &entry.user_type { - claude_extras - .entry("user_type".to_string()) - .or_insert_with(|| serde_json::Value::String(v.clone())); - } - if let Some(v) = &entry.request_id { - claude_extras - .entry("request_id".to_string()) - .or_insert_with(|| serde_json::Value::String(v.clone())); - } - let extra = if claude_extras.is_empty() { - HashMap::new() - } else { - let mut map = HashMap::new(); - map.insert( - "claude".to_string(), - serde_json::Value::Object(claude_extras), - ); - map - }; - Turn { id: entry.uuid.clone(), parent_id: entry.parent_uuid.clone(), @@ -168,7 +132,6 @@ fn message_to_turn(entry: &ConversationEntry, msg: &Message) -> Turn { token_usage, environment, delegations, - extra, file_mutations, } } @@ -1183,7 +1146,6 @@ mod tests { input: serde_json::json!({}), result: None, category: Some(ToolCategory::FileRead), - ..Default::default() }, ToolInvocation { id: "tool-b".into(), @@ -1191,7 +1153,6 @@ mod tests { input: serde_json::json!({}), result: None, category: Some(ToolCategory::FileWrite), - ..Default::default() }, ], model: None, @@ -1199,7 +1160,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: Default::default(), file_mutations: Vec::new(), }]; @@ -1420,29 +1380,6 @@ mod tests { ); } - // ── Provider-specific extras (Turn.extra["claude"]) ───────────── - - #[test] - fn test_turn_extra_populated_from_entry() { - let entry: ConversationEntry = serde_json::from_str( - r#"{"uuid":"u1","type":"user","timestamp":"2024-01-01T00:00:00Z","subtype":"init","message":{"role":"user","content":"hello"}}"#, - ) - .unwrap(); - let turn = to_turn(&entry).unwrap(); - let claude = turn.extra.get("claude").expect("extra[\"claude\"] missing"); - assert_eq!(claude["subtype"], "init"); - } - - #[test] - fn test_turn_extra_empty_when_no_extras() { - let entry: ConversationEntry = serde_json::from_str( - r#"{"uuid":"u1","type":"user","timestamp":"2024-01-01T00:00:00Z","message":{"role":"user","content":"hello"}}"#, - ) - .unwrap(); - let turn = to_turn(&entry).unwrap(); - assert!(turn.extra.is_empty()); - } - #[test] fn test_progress_data_enriched_with_extras() { let entry: ConversationEntry = serde_json::from_str( diff --git a/crates/toolpath-codex/src/project.rs b/crates/toolpath-codex/src/project.rs index 52dc8c2..8efc2c1 100644 --- a/crates/toolpath-codex/src/project.rs +++ b/crates/toolpath-codex/src/project.rs @@ -237,13 +237,12 @@ fn make_turn_context_line( } } -/// Pull `Turn.extra["codex"]` if present (the namespace forward path -/// uses). Foreign-namespace extras are intentionally not consulted. -fn codex_extras(turn: &Turn) -> Option<&Map> { - turn.extra.get("codex").and_then(|v| match v { - Value::Object(m) => Some(m), - _ => None, - }) +/// Used to return `Turn.extra["codex"]`; the IR no longer carries +/// provider-namespaced extras. Always `None`. Callers fall back to +/// reconstructing source-format details from typed IR fields and +/// reasonable defaults. +fn codex_extras(_turn: &Turn) -> Option<&'static Map> { + None } fn emit_turn_lines( @@ -650,7 +649,6 @@ fn convo_usage_to_codex_json(u: &toolpath_convo::TokenUsage) -> Value { #[cfg(test)] mod tests { use super::*; - use std::collections::HashMap; use toolpath_convo::{TokenUsage, ToolCategory, ToolInvocation, ToolResult}; fn user_turn(id: &str, text: &str) -> Turn { @@ -667,7 +665,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -691,7 +688,6 @@ mod tests { }), environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -785,7 +781,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::Shell), - ..Default::default() }]; let s = CodexProjector::default() .project(&view_with(vec![t])) @@ -842,7 +837,6 @@ mod tests { input: json!({"command": "ls"}), result: None, category: Some(ToolCategory::Shell), - ..Default::default() }]; let s = CodexProjector::default() .project(&view_with(vec![t])) @@ -873,7 +867,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileWrite), - ..Default::default() }]; let s = CodexProjector::default() .project(&view_with(vec![t])) @@ -907,62 +900,6 @@ mod tests { assert_eq!(summary[0]["text"], "hmm let me consider"); } - #[test] - fn codex_extras_reasoning_encrypted_round_trips_verbatim() { - let mut t = assistant_turn("a1", "Done."); - t.extra.insert( - "codex".into(), - json!({ - "reasoning_encrypted": ["gAAAAAB-fake-blob-1", "gAAAAAB-fake-blob-2"] - }), - ); - let s = CodexProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let reasoning_lines: Vec<&RolloutLine> = s - .lines - .iter() - .filter(|l| l.payload.get("type").and_then(Value::as_str) == Some("reasoning")) - .collect(); - assert_eq!(reasoning_lines.len(), 2); - assert_eq!( - reasoning_lines[0].payload["encrypted_content"], - "gAAAAAB-fake-blob-1" - ); - assert_eq!( - reasoning_lines[1].payload["encrypted_content"], - "gAAAAAB-fake-blob-2" - ); - } - - #[test] - fn foreign_namespace_extras_are_dropped() { - // Turn.extra["claude"] / Turn.extra["gemini"] must NOT appear - // anywhere on the projected lines. - let mut t = assistant_turn("a1", "hi"); - t.extra.insert( - "claude".into(), - json!({"version": "2.1.116", "user_type": "external"}), - ); - t.extra.insert("gemini".into(), json!({"foo": "bar"})); - let s = CodexProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - for line in &s.lines { - let serialized = serde_json::to_string(line).unwrap(); - assert!( - !serialized.contains("\"version\":\"2.1.116\""), - "claude leak: {}", - serialized - ); - assert!( - !serialized.contains("\"foo\":\"bar\""), - "gemini leak: {}", - serialized - ); - } - } - #[test] fn session_meta_carries_default_originator() { let s = CodexProjector::default() diff --git a/crates/toolpath-codex/src/provider.rs b/crates/toolpath-codex/src/provider.rs index 3689d2f..a5fe03d 100644 --- a/crates/toolpath-codex/src/provider.rs +++ b/crates/toolpath-codex/src/provider.rs @@ -30,7 +30,7 @@ use crate::types::{ EventMsg, ExecCommandEnd, Message, PatchApplyEnd, PatchChange, ResponseItem, RolloutItem, Session, TokenCountInfo, TokenUsage as CodexTokenUsage, }; -use serde_json::{Map, Value}; +use serde_json::Value; use toolpath_convo::{ ConversationEvent, ConversationMeta, ConversationProvider, ConversationView, ConvoError, EnvironmentSnapshot, FileMutation, ProducerInfo, Role, SessionBase, TokenUsage, ToolCategory, @@ -180,11 +180,6 @@ struct Builder<'a> { /// Plaintext reasoning summaries (rare — only in configurations where /// OpenAI exposes public reasoning). These land on `Turn.thinking`. pending_reasoning_plaintext: Vec, - /// Opaque encrypted ciphertext from OpenAI's servers. Preserved on - /// the next assistant turn's `extra["codex"]["reasoning_encrypted"]` - /// for round-trip fidelity. Never goes to `Turn.thinking` — it - /// would render as garbage. - pending_reasoning_encrypted: Vec, pending_token_usage: Option, working_dir: Option, current_model: Option, @@ -202,7 +197,6 @@ impl<'a> Builder<'a> { turns: Vec::new(), events: Vec::new(), pending_reasoning_plaintext: Vec::new(), - pending_reasoning_encrypted: Vec::new(), pending_token_usage: None, working_dir: None, current_model: None, @@ -349,7 +343,6 @@ impl<'a> Builder<'a> { events: self.events, base, producer, - ..Default::default() } } @@ -365,10 +358,6 @@ impl<'a> Builder<'a> { self.push_turn(turn); } ResponseItem::Reasoning(r) => { - // Encrypted blob → round-trip preservation only. - if let Some(s) = r.encrypted_content { - self.pending_reasoning_encrypted.push(s); - } // Plaintext content (rare) → Turn.thinking. if let Some(Value::Array(arr)) = r.content.as_ref() { for v in arr { @@ -392,12 +381,7 @@ impl<'a> Builder<'a> { } else { input }; - let mut extra: Map = Map::new(); - extra.insert("raw_arguments".into(), Value::String(fc.arguments.clone())); - if let Some(ns) = fc.namespace.as_ref() { - extra.insert("namespace".into(), Value::String(ns.clone())); - } - self.attach_tool_call(timestamp, fc.call_id, name, input, extra, false); + self.attach_tool_call(timestamp, fc.call_id, name, input); } ResponseItem::FunctionCallOutput(out) => { let is_error = out @@ -409,12 +393,7 @@ impl<'a> Builder<'a> { } ResponseItem::CustomToolCall(ct) => { let input = Value::String(ct.input.clone()); - let mut extra: Map = Map::new(); - extra.insert("tool_call_kind".into(), Value::String("custom".into())); - if let Some(s) = ct.status.as_ref() { - extra.insert("status".into(), Value::String(s.clone())); - } - self.attach_tool_call(timestamp, ct.call_id, ct.name, input, extra, true); + self.attach_tool_call(timestamp, ct.call_id, ct.name, input); } ResponseItem::CustomToolCallOutput(out) => { let is_error = out @@ -478,8 +457,6 @@ impl<'a> Builder<'a> { call_id: String, name: String, input: Value, - codex_tool_extra: Map, - _is_custom: bool, ) { let category = tool_category(&name); let invocation = ToolInvocation { @@ -488,7 +465,6 @@ impl<'a> Builder<'a> { input, result: None, category, - ..Default::default() }; let turn_idx = match self.last_assistant_turn_index() { @@ -505,15 +481,6 @@ impl<'a> Builder<'a> { } }; let tool_idx = self.turns[turn_idx].tool_uses.len(); - if !codex_tool_extra.is_empty() { - let codex = turn_extra_codex_mut(&mut self.turns[turn_idx]); - let tool_extras = codex - .entry("tool_extras") - .or_insert_with(|| Value::Object(Map::new())); - if let Value::Object(m) = tool_extras { - m.insert(call_id.clone(), Value::Object(codex_tool_extra)); - } - } self.turns[turn_idx].tool_uses.push(invocation); self.call_index.insert(call_id, (turn_idx, tool_idx)); } @@ -562,39 +529,8 @@ impl<'a> Builder<'a> { content: body, is_error, }); - } else if is_error { - // Escalate existing result to error if exit indicates failure. - if let Some(r) = inv.result.as_mut() { - r.is_error = true; - } - } - let codex = turn_extra_codex_mut(turn); - let tool_extras = codex - .entry("tool_extras") - .or_insert_with(|| Value::Object(Map::new())); - if let Value::Object(m) = tool_extras { - let entry = m - .entry(exec.call_id.clone()) - .or_insert_with(|| Value::Object(Map::new())); - if let Value::Object(inner) = entry { - inner.insert( - "exit_code".into(), - exec.exit_code - .map(|c| Value::Number(serde_json::Number::from(c))) - .unwrap_or(Value::Null), - ); - if !exec.command.is_empty() { - inner.insert( - "command".into(), - Value::Array( - exec.command - .iter() - .map(|s| Value::String(s.clone())) - .collect(), - ), - ); - } - } + } else if is_error && let Some(r) = inv.result.as_mut() { + r.is_error = true; } } } @@ -643,15 +579,6 @@ impl<'a> Builder<'a> { turn.thinking = Some(self.pending_reasoning_plaintext.join("\n\n")); self.pending_reasoning_plaintext.clear(); } - // Encrypted ciphertext goes into extra for round-trip only. - if !self.pending_reasoning_encrypted.is_empty() { - let drained: Vec = self.pending_reasoning_encrypted.drain(..).collect(); - let codex = turn_extra_codex_mut(turn); - codex.insert( - "reasoning_encrypted".into(), - Value::Array(drained.into_iter().map(Value::String).collect()), - ); - } if let Some(tu) = self.pending_token_usage.take() { turn.token_usage = Some(tu); } @@ -754,24 +681,6 @@ fn message_to_turn( vcs_revision: None, }); - let mut extra: HashMap = HashMap::new(); - let mut codex_extra: Map = Map::new(); - if msg.role == "developer" { - codex_extra.insert("role".into(), Value::String("developer".into())); - } - if let Some(phase) = &msg.phase { - codex_extra.insert("phase".into(), Value::String(phase.clone())); - } - if let Some(end_turn) = msg.end_turn { - codex_extra.insert("end_turn".into(), Value::Bool(end_turn)); - } - if let Some(id) = &msg.id { - codex_extra.insert("message_id".into(), Value::String(id.clone())); - } - if !codex_extra.is_empty() { - extra.insert("codex".into(), Value::Object(codex_extra)); - } - Turn { id: msg.id.clone().unwrap_or_default(), parent_id: None, @@ -789,7 +698,6 @@ fn message_to_turn( token_usage: None, environment, delegations: Vec::new(), - extra, file_mutations: Vec::new(), } } @@ -816,7 +724,6 @@ fn synthetic_assistant_turn( vcs_revision: None, }), delegations: Vec::new(), - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -863,19 +770,6 @@ fn data_from_value(v: &Value) -> HashMap { } } -fn turn_extra_codex_mut(turn: &mut Turn) -> &mut Map { - let entry = turn - .extra - .entry("codex".to_string()) - .or_insert_with(|| Value::Object(Map::new())); - if !entry.is_object() { - *entry = Value::Object(Map::new()); - } - entry - .as_object_mut() - .expect("entry was just ensured to be an object") -} - // ── ConversationProvider trait impl ──────────────────────────────── impl ConversationProvider for CodexConvo { @@ -996,10 +890,10 @@ mod tests { } #[test] - fn encrypted_reasoning_preserved_in_extra_not_thinking() { - // The fixture only has encrypted_content. That must land under - // `extra["codex"]["reasoning_encrypted"]` — and NOT be rendered - // as `Turn.thinking` (which would be opaque ciphertext). + fn encrypted_reasoning_does_not_land_on_thinking() { + // The fixture only has encrypted_content. That must NOT be rendered + // as `Turn.thinking` (which would be opaque ciphertext). Since + // Turn.extra was removed, encrypted ciphertext is simply dropped. let (_t, mgr, id) = setup_session_fixture(&minimal_session()); let view = to_view(&mgr.read_session(&id).unwrap()); let assistant = &view.turns[1]; @@ -1007,13 +901,6 @@ mod tests { assistant.thinking.is_none(), "encrypted ciphertext must not appear as thinking" ); - let codex = assistant.extra.get("codex").expect("codex extra"); - let enc = codex - .get("reasoning_encrypted") - .and_then(|v| v.as_array()) - .expect("reasoning_encrypted array"); - assert_eq!(enc.len(), 1); - assert_eq!(enc[0], "encrypted-blob-1"); } #[test] @@ -1032,14 +919,6 @@ mod tests { view.turns[0].thinking.as_deref(), Some("I should check the file") ); - // No encrypted blob on this one, so extra["codex"] either omits - // `reasoning_encrypted` or has no such key. - let has_enc = view.turns[0] - .extra - .get("codex") - .and_then(|c| c.get("reasoning_encrypted")) - .is_some(); - assert!(!has_enc, "no encrypted content was emitted"); } #[test] @@ -1184,7 +1063,5 @@ mod tests { let (_t, mgr, id) = setup_session_fixture(&body); let view = to_view(&mgr.read_session(&id).unwrap()); assert_eq!(view.turns[0].role, Role::System); - let codex = view.turns[0].extra.get("codex").unwrap(); - assert_eq!(codex["role"], "developer"); } } diff --git a/crates/toolpath-codex/tests/fixture_roundtrip.rs b/crates/toolpath-codex/tests/fixture_roundtrip.rs index ef1ef7b..f756e78 100644 --- a/crates/toolpath-codex/tests/fixture_roundtrip.rs +++ b/crates/toolpath-codex/tests/fixture_roundtrip.rs @@ -139,31 +139,14 @@ fn token_usage_captured() { } #[test] -fn encrypted_reasoning_preserved_in_extra() { +fn encrypted_reasoning_does_not_land_on_thinking() { // Codex rollouts almost always carry OpenAI's encrypted reasoning - // ciphertext — not plaintext. It's round-trip material, not something - // we can render, so it lives under `turn.extra["codex"].reasoning_encrypted` - // and never on `turn.thinking`. + // ciphertext — not plaintext. With `Turn.extra` gone, the ciphertext is + // dropped on the conversion to the IR; the invariant we still preserve + // is that it must not pollute `turn.thinking` (which would render as + // garbage). let s = session(); let view = to_view(&s); - let with_encrypted = view - .turns - .iter() - .filter(|t| { - t.extra - .get("codex") - .and_then(|v| v.get("reasoning_encrypted")) - .and_then(|v| v.as_array()) - .map(|a| !a.is_empty()) - .unwrap_or(false) - }) - .count(); - assert!( - with_encrypted >= 1, - "expected at least one turn with encrypted reasoning preserved in extra" - ); - // And nothing should have landed on `thinking` — the fixture has no - // plaintext summaries, just ciphertext. let with_thinking = view.turns.iter().filter(|t| t.thinking.is_some()).count(); assert_eq!( with_thinking, 0, diff --git a/crates/toolpath-codex/tests/projection_roundtrip.rs b/crates/toolpath-codex/tests/projection_roundtrip.rs index ed1d6d1..db9a6fc 100644 --- a/crates/toolpath-codex/tests/projection_roundtrip.rs +++ b/crates/toolpath-codex/tests/projection_roundtrip.rs @@ -209,28 +209,6 @@ fn roundtrip_preserves_custom_tool_call_inputs() { } } -#[test] -fn roundtrip_preserves_encrypted_reasoning_content_count() { - // Encrypted reasoning blobs survive because the forward path - // stashes them in `Turn.extra["codex"]["reasoning_encrypted"]` - // and the projector re-emits one Reasoning line per blob. - let (_t, source) = load_source(); - let (_, rebuilt, _) = roundtrip(&source); - - let count_encrypted = |s: &Session| -> usize { - s.lines - .iter() - .filter(|l| { - if let RolloutItem::ResponseItem(ResponseItem::Reasoning(r)) = l.item() { - return r.encrypted_content.is_some(); - } - false - }) - .count() - }; - assert_eq!(count_encrypted(&rebuilt), count_encrypted(&source)); -} - #[test] fn projected_jsonl_reparses_through_codex_reader() { // Strongest contract test: serialize the rebuilt session as diff --git a/crates/toolpath-convo/src/derive.rs b/crates/toolpath-convo/src/derive.rs index c497f62..faabfb5 100644 --- a/crates/toolpath-convo/src/derive.rs +++ b/crates/toolpath-convo/src/derive.rs @@ -202,12 +202,6 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { extra.insert("environment".to_string(), v); } - if !turn.extra.is_empty() - && let Ok(v) = serde_json::to_value(&turn.extra) - { - extra.insert("turn_extra".to_string(), v); - } - step.change.insert( conv_artifact_key.clone(), ArtifactChange { @@ -662,7 +656,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -755,7 +748,6 @@ mod tests { input, result: None, category: Some(ToolCategory::FileWrite), - ..Default::default() } } @@ -826,7 +818,6 @@ mod tests { input: serde_json::json!({"file_path": "x.rs"}), result: None, category: Some(ToolCategory::FileRead), - ..Default::default() }]; let view = view_with(vec![turn]); let path = derive_path(&view, &DeriveConfig::default()); @@ -1024,7 +1015,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), - ..Default::default() }]; let view = view_with(vec![turn]); let path = derive_path(&view, &DeriveConfig::default()); @@ -1041,7 +1031,6 @@ mod tests { input: serde_json::json!({}), result: None, category: Some(ToolCategory::FileRead), - ..Default::default() }]; let view = view_with(vec![turn]); let cfg = DeriveConfig { diff --git a/crates/toolpath-convo/src/extract.rs b/crates/toolpath-convo/src/extract.rs index 57e1635..4753785 100644 --- a/crates/toolpath-convo/src/extract.rs +++ b/crates/toolpath-convo/src/extract.rs @@ -304,8 +304,6 @@ fn build_turn(step: &Step, extra: &HashMap) -> Turn { let delegations = build_delegations(extra); - let turn_extra = build_turn_extra(extra); - let parent_id = step.step.parents.first().cloned(); Turn { @@ -322,7 +320,6 @@ fn build_turn(step: &Step, extra: &HashMap) -> Turn { environment, delegations, file_mutations: Vec::new(), - extra: turn_extra, } } @@ -377,7 +374,6 @@ fn build_inline_tool_uses(extra: &HashMap) -> Vec) -> Vec, -) -> HashMap { - let mut out: HashMap = HashMap::new(); - - // Shared-derive path: verbatim map. - if let Some(obj) = extra.get("turn_extra").and_then(|v| v.as_object()) { - for (k, v) in obj { - out.insert(k.clone(), v.clone()); - } - } - - // Claude bespoke path: hoist known top-level fields under `"claude"`. - let mut claude_data = serde_json::Map::new(); - if let Some(v) = extra.get("version") { - claude_data.insert("version".to_string(), v.clone()); - } - if let Some(v) = extra.get("user_type") { - claude_data.insert("user_type".to_string(), v.clone()); - } - if let Some(v) = extra.get("request_id") { - claude_data.insert("request_id".to_string(), v.clone()); - } - if let Some(entry_extra) = extra.get("entry_extra").and_then(|v| v.as_object()) { - for (k, v) in entry_extra { - claude_data.insert(k.clone(), v.clone()); - } - } - if !claude_data.is_empty() { - // Merge with any existing `"claude"` key from turn_extra so we - // don't clobber provider-supplied fields. - let merged = match out.remove("claude") { - Some(serde_json::Value::Object(existing)) => { - let mut m = existing; - for (k, v) in claude_data { - m.entry(k).or_insert(v); - } - serde_json::Value::Object(m) - } - _ => serde_json::Value::Object(claude_data), - }; - out.insert("claude".to_string(), merged); - } - - out -} - fn build_token_usage(extra: &HashMap) -> Option { // Shared-derive schema: nested `token_usage` object. if let Some(v) = extra.get("token_usage") @@ -518,7 +462,6 @@ fn build_tool_invocation(extra: &HashMap) -> ToolInvo input, result, category, - ..Default::default() } } @@ -1228,86 +1171,6 @@ mod tests { assert!(view.turns[0].environment.is_none()); } - #[test] - fn test_extra_claude_metadata() { - let path = make_path(vec![make_step( - "step-001", - "agent:claude-opus-4-6", - "2026-01-01T00:00:00Z", - vec![], - vec![( - "agent://claude-code/sess-1", - "conversation.append", - extras(&[ - ("role", serde_json::json!("assistant")), - ("text", serde_json::json!("hi")), - ("version", serde_json::json!("1.0.30")), - ("user_type", serde_json::json!("pro")), - ("request_id", serde_json::json!("req-abc-123")), - ]), - )], - )]); - - let view = extract_conversation(&path); - let claude = view.turns[0].extra.get("claude").unwrap(); - assert_eq!(claude["version"], serde_json::json!("1.0.30")); - assert_eq!(claude["user_type"], serde_json::json!("pro")); - assert_eq!(claude["request_id"], serde_json::json!("req-abc-123")); - } - - #[test] - fn test_entry_extra_merged_into_claude() { - let path = make_path(vec![make_step( - "step-001", - "agent:claude-opus-4-6", - "2026-01-01T00:00:00Z", - vec![], - vec![( - "agent://claude-code/sess-1", - "conversation.append", - extras(&[ - ("role", serde_json::json!("assistant")), - ("text", serde_json::json!("hi")), - ( - "entry_extra", - serde_json::json!({ - "entrypoint": "cli", - "isMeta": true, - "slug": "my-project" - }), - ), - ]), - )], - )]); - - let view = extract_conversation(&path); - let claude = view.turns[0].extra.get("claude").unwrap(); - assert_eq!(claude["entrypoint"], serde_json::json!("cli")); - assert_eq!(claude["isMeta"], serde_json::json!(true)); - assert_eq!(claude["slug"], serde_json::json!("my-project")); - } - - #[test] - fn test_extra_empty_when_no_metadata() { - let path = make_path(vec![make_step( - "step-001", - "human:alex", - "2026-01-01T00:00:00Z", - vec![], - vec![( - "agent://claude-code/sess-1", - "conversation.append", - extras(&[ - ("role", serde_json::json!("user")), - ("text", serde_json::json!("hello")), - ]), - )], - )]); - - let view = extract_conversation(&path); - assert!(view.turns[0].extra.is_empty()); - } - #[test] fn test_agent_url_tool_not_in_files_changed() { let path = make_path(vec![ diff --git a/crates/toolpath-convo/src/lib.rs b/crates/toolpath-convo/src/lib.rs index 3308ba0..a2883b4 100644 --- a/crates/toolpath-convo/src/lib.rs +++ b/crates/toolpath-convo/src/lib.rs @@ -285,14 +285,6 @@ pub struct Turn { /// the entry links back to that `ToolInvocation::id`. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub file_mutations: Vec, - - /// Provider-specific data that doesn't fit the common schema. - /// - /// Providers namespace their data under a provider key (e.g. - /// `extra["claude"]` for Claude Code) to avoid collisions when - /// consumers work with multiple providers. - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - pub extra: HashMap, } /// A complete conversation from any provider. @@ -561,7 +553,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), }, Turn { @@ -580,7 +571,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), - ..Default::default() }], model: Some("claude-opus-4-6".into()), stop_reason: Some("end_turn".into()), @@ -592,7 +582,6 @@ mod tests { }), environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), }, Turn { @@ -608,7 +597,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), }, ], @@ -886,7 +874,6 @@ mod tests { input: serde_json::json!({"command": "ls"}), result: None, category: Some(ToolCategory::Shell), - ..Default::default() }; let json = serde_json::to_string(&ti).unwrap(); assert!(json.contains("\"shell\"")); @@ -902,7 +889,6 @@ mod tests { input: serde_json::json!({}), result: None, category: None, - ..Default::default() }; let json = serde_json::to_string(&ti).unwrap(); assert!(!json.contains("category")); @@ -957,7 +943,6 @@ mod tests { turns: vec![], result: None, }], - extra: HashMap::new(), file_mutations: Vec::new(), }; let json = serde_json::to_string(&turn).unwrap(); diff --git a/crates/toolpath-convo/src/project.rs b/crates/toolpath-convo/src/project.rs index 1a48409..f991d5a 100644 --- a/crates/toolpath-convo/src/project.rs +++ b/crates/toolpath-convo/src/project.rs @@ -135,7 +135,6 @@ impl AnyProjector { mod tests { use super::*; use crate::{Role, TokenUsage, ToolInvocation, ToolResult, Turn}; - use std::collections::HashMap; // ── helpers ────────────────────────────────────────────────────── @@ -168,7 +167,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -353,7 +351,6 @@ mod tests { is_error: false, }), category: None, - ..Default::default() }, ToolInvocation { id: "u2".into(), @@ -361,7 +358,6 @@ mod tests { input: serde_json::json!({"command": "cargo test"}), result: None, category: None, - ..Default::default() }, ], model: None, @@ -369,7 +365,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), }], total_usage: None, @@ -431,7 +426,6 @@ mod tests { }), environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), }, Turn { @@ -452,7 +446,6 @@ mod tests { }), environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), }, ], diff --git a/crates/toolpath-gemini/src/project.rs b/crates/toolpath-gemini/src/project.rs index 395f83a..59db84a 100644 --- a/crates/toolpath-gemini/src/project.rs +++ b/crates/toolpath-gemini/src/project.rs @@ -4,13 +4,9 @@ //! This is the inverse of [`crate::provider::to_view`]: where `to_view` //! reads a Gemini session directory into a provider-agnostic view, //! `GeminiProjector` serializes that view back into the on-disk chat -//! format. -//! -//! The projector relies on the provider-specific data that -//! [`crate::provider::to_view`] stashes under `Turn.extra["gemini"]`: -//! the full `tokens` struct, per-thought metadata, per-tool-call -//! metadata (`status`, `resultDisplay`, `description`, `displayName`), -//! and any message-level extras picked up via `#[serde(flatten)]`. +//! format. Round-tripping is best-effort and lossy on Gemini-only +//! fields (full token breakdown, per-thought timestamps, per-tool-call +//! status/displayName) — the IR no longer carries them. use std::collections::HashMap; @@ -137,7 +133,13 @@ fn project_view( // ── Turn → GeminiMessage ───────────────────────────────────────────── fn turn_to_message(turn: &Turn) -> GeminiMessage { - let (gemini_extras, msg_extras) = split_gemini_extras(&turn.extra); + // `Turn.extra` is gone; previously the Gemini projector pulled + // `extra["gemini"]` for structured thought meta, full tokens, and + // per-tool-call status. With that source removed, `build_thoughts` / + // `build_tokens` / `build_tool_calls` fall back to the typed IR + // fields (`Turn.thinking` as a string, `Turn.token_usage`, etc.). + let gemini_extras: Map = Map::new(); + let msg_extras: HashMap = HashMap::new(); GeminiMessage { id: turn.id.clone(), @@ -176,45 +178,6 @@ fn build_content(turn: &Turn) -> GeminiContent { } } -/// Separate the `"gemini"` submap from `Turn.extra` and partition it -/// into Gemini-specific projector metadata (tokens / thoughts_meta / -/// tool_call_meta — consumed locally) and message-level extras -/// (everything else under the gemini key — got there by being flattened -/// in via `#[serde(flatten)]` on forward, restored to the message on -/// reverse). -/// -/// **Foreign namespaces are dropped.** `Turn.extra["claude"]` and -/// similar exist to round-trip through Path documents under the -/// originating provider; they have no meaning to Gemini and would -/// pollute the JSON if we let them flatten onto messages. The Path -/// doc still carries them; only the *Gemini view* discards them. -fn split_gemini_extras( - extra: &HashMap, -) -> (Map, HashMap) { - let mut gemini_meta = Map::new(); - let mut msg_extra: HashMap = HashMap::new(); - - if let Some(Value::Object(gem)) = extra.get("gemini") { - for (k, v) in gem { - match k.as_str() { - // Projector-internal metadata — used to rebuild Tokens, - // Thought[], and per-tool-call render hints. - "tokens" | "thoughts_meta" | "tool_call_meta" => { - gemini_meta.insert(k.clone(), v.clone()); - } - // Anything else under `gemini.*` was a flattened - // message-level extra during forward; restore it to - // the message. - _ => { - msg_extra.insert(k.clone(), v.clone()); - } - } - } - } - - (gemini_meta, msg_extra) -} - /// Rebuild `Thought[]`. /// /// Preferred source: `extra["gemini"]["thoughts_meta"]`, which carries @@ -604,7 +567,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -623,7 +585,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -690,31 +651,6 @@ mod tests { assert_eq!(convo.main.messages[0].role, GeminiRole::Info); } - #[test] - fn test_thoughts_rebuilt_from_meta() { - let mut t = assistant_turn("a1", ""); - let meta = serde_json::json!([ - {"subject": "Searching", "description": "looking in /auth", "timestamp": "2026-04-17T15:00:02Z"}, - {"subject": "Plan", "description": "try token path", "timestamp": "2026-04-17T15:00:03Z"}, - ]); - t.extra - .insert("gemini".into(), serde_json::json!({"thoughts_meta": meta})); - t.thinking = Some("**Searching**\nlooking in /auth\n\n**Plan**\ntry token path".into()); - - let convo = GeminiProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let thoughts = convo.main.messages[0].thoughts.as_ref().unwrap(); - assert_eq!(thoughts.len(), 2); - assert_eq!(thoughts[0].subject.as_deref(), Some("Searching")); - assert_eq!(thoughts[0].description.as_deref(), Some("looking in /auth")); - assert_eq!( - thoughts[0].timestamp.as_deref(), - Some("2026-04-17T15:00:02Z") - ); - assert_eq!(thoughts[1].subject.as_deref(), Some("Plan")); - } - #[test] fn test_thoughts_fallback_from_flattened_string() { // No gemini.thoughts_meta — projector should still un-flatten the string. @@ -729,25 +665,6 @@ mod tests { assert_eq!(thoughts[0].description.as_deref(), Some("looking in /auth")); } - #[test] - fn test_tokens_from_gemini_extras_preserved() { - let mut t = assistant_turn("a1", "Done."); - t.extra.insert( - "gemini".into(), - serde_json::json!({ - "tokens": {"input": 10, "output": 5, "cached": 0, "thoughts": 2, "tool": 0, "total": 17} - }), - ); - let convo = GeminiProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let tokens = convo.main.messages[0].tokens.as_ref().unwrap(); - assert_eq!(tokens.input, Some(10)); - assert_eq!(tokens.output, Some(5)); - assert_eq!(tokens.thoughts, Some(2)); - assert_eq!(tokens.total, Some(17)); - } - #[test] fn test_tokens_fallback_from_common_token_usage() { let mut t = assistant_turn("a1", "Done."); @@ -780,7 +697,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), - ..Default::default() }]; let convo = GeminiProjector::default() .project(&view_with(vec![t])) @@ -811,7 +727,6 @@ mod tests { is_error: true, }), category: Some(ToolCategory::Shell), - ..Default::default() }]; let convo = GeminiProjector::default() .project(&view_with(vec![t])) @@ -820,41 +735,6 @@ mod tests { assert_eq!(call.status, "error"); } - #[test] - fn test_tool_call_meta_preserves_result_display_and_description() { - let mut t = assistant_turn("a1", ""); - t.tool_uses = vec![ToolInvocation { - id: "tc1".into(), - name: "write_file".into(), - input: serde_json::json!({"file_path": "a.rs"}), - result: Some(ToolResult { - content: "wrote".into(), - is_error: false, - }), - category: Some(ToolCategory::FileWrite), - ..Default::default() - }]; - t.extra.insert( - "gemini".into(), - serde_json::json!({ - "tool_call_meta": [{ - "id": "tc1", - "status": "success", - "result_display": {"fileDiff": "@@\n+x"}, - "description": "write a.rs", - "display_name": "Write a.rs", - }], - }), - ); - let convo = GeminiProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let call = &convo.main.messages[0].tool_calls.as_ref().unwrap()[0]; - assert_eq!(call.description.as_deref(), Some("write a.rs")); - assert_eq!(call.display_name.as_deref(), Some("Write a.rs")); - assert_eq!(call.file_diff().as_deref(), Some("@@\n+x")); - } - #[test] fn test_delegation_becomes_subagent_chat_file() { let mut t = assistant_turn("a1", "delegating"); @@ -897,54 +777,6 @@ mod tests { assert!(convo.main.directories.is_none()); } - #[test] - fn test_foreign_namespace_extras_are_dropped() { - // Provider-namespaced extras from other harnesses (e.g. - // `Turn.extra["claude"]`) must NOT leak as top-level fields on - // a Gemini message — they pollute the JSON and have no meaning - // to Gemini. Only the `gemini` submap (and its non-projector - // contents) is honored. - let mut t = user_turn("u1", "hi"); - t.extra.insert( - "claude".into(), - serde_json::json!({"version": "2.1.116", "user_type": "external"}), - ); - t.extra - .insert("codex".into(), serde_json::json!({"some": "data"})); - let convo = GeminiProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let msg = &convo.main.messages[0]; - assert!( - !msg.extra.contains_key("claude"), - "claude namespace should not leak onto Gemini messages" - ); - assert!(!msg.extra.contains_key("codex")); - } - - #[test] - fn test_gemini_native_message_extras_are_preserved() { - // Gemini-native message-level extras are flattened INTO the - // gemini submap on forward (provider.rs::build_gemini_extra). - // The projector must restore them at the message level. - let mut t = user_turn("u1", "hi"); - t.extra.insert( - "gemini".into(), - serde_json::json!({ - "tokens": {"input": 10}, - "some_native_extra": "round-tripped value", - }), - ); - let convo = GeminiProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let msg = &convo.main.messages[0]; - assert_eq!( - msg.extra.get("some_native_extra"), - Some(&serde_json::json!("round-tripped value")) - ); - } - #[test] fn test_project_hash_and_path_propagate() { let view = view_with(vec![user_turn("u1", "hi")]); @@ -976,7 +808,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), - ..Default::default() }]; let convo = GeminiProjector::default() diff --git a/crates/toolpath-gemini/src/provider.rs b/crates/toolpath-gemini/src/provider.rs index 7248214..37292aa 100644 --- a/crates/toolpath-gemini/src/provider.rs +++ b/crates/toolpath-gemini/src/provider.rs @@ -9,11 +9,9 @@ //! chat, the matching sub-agent file's turns are populated onto a //! [`DelegatedWork`]. -use std::collections::HashMap; - use crate::GeminiConvo; use crate::types::{ChatFile, Conversation, GeminiMessage, GeminiRole, Thought, ToolCall}; -use serde_json::{Map, Value}; +use serde_json::Value; use toolpath_convo::{ ConversationMeta, ConversationProvider, ConversationView, ConvoError, DelegatedWork, EnvironmentSnapshot, Role, TokenUsage, ToolCategory, ToolInvocation, ToolResult, Turn, @@ -118,12 +116,6 @@ fn message_to_turn(msg: &GeminiMessage, working_dir: Option<&str>) -> Turn { vcs_revision: None, }); - let mut extra = HashMap::new(); - let gemini_extra = build_gemini_extra(msg); - if !gemini_extra.is_empty() { - extra.insert("gemini".to_string(), Value::Object(gemini_extra)); - } - Turn { id: msg.id.clone(), parent_id: None, @@ -137,7 +129,6 @@ fn message_to_turn(msg: &GeminiMessage, working_dir: Option<&str>) -> Turn { token_usage, environment, delegations: vec![], - extra, file_mutations, } } @@ -148,6 +139,7 @@ fn message_to_turn(msg: &GeminiMessage, working_dir: Option<&str>) -> Turn { /// computed by the harness). /// 2. Hand-rolled fallback from `args` (`old_string`/`new_string` for /// `replace`, `content` for `write_file`). +/// /// `tool_id` links back to the [`ToolCall`]. fn compute_file_mutations(calls: &[ToolCall]) -> Vec { let mut out = Vec::new(); @@ -260,65 +252,7 @@ fn tool_call_to_invocation(call: &ToolCall) -> ToolInvocation { input: call.args.clone(), result, category: tool_category(&call.name), - ..Default::default() - } -} - -/// Collect fields that don't map cleanly onto the common `Turn` schema -/// into a map that lives under `Turn.extra["gemini"]`. -fn build_gemini_extra(msg: &GeminiMessage) -> Map { - let mut map = Map::new(); - - // Raw tokens struct (includes thoughts/tool/total not in the common schema). - if let Some(t) = &msg.tokens - && let Ok(v) = serde_json::to_value(t) - { - map.insert("tokens".to_string(), v); - } - - // Full thought structs preserved verbatim. The flattened text also - // lands in Turn.thinking; this map is what the reverse projector uses - // to rebuild Gemini's `thoughts[]` array losslessly. - if !msg.thoughts().is_empty() { - let meta: Vec = msg - .thoughts() - .iter() - .map(|t| { - serde_json::json!({ - "subject": t.subject, - "description": t.description, - "timestamp": t.timestamp, - }) - }) - .collect(); - map.insert("thoughts_meta".to_string(), Value::Array(meta)); - } - - // Tool call statuses (pending/executing/etc. — the result-only view - // on ToolInvocation loses this nuance). - if !msg.tool_calls().is_empty() { - let statuses: Vec = msg - .tool_calls() - .iter() - .map(|t| { - serde_json::json!({ - "id": t.id, - "status": t.status, - "result_display": t.result_display, - "description": t.description, - "display_name": t.display_name, - }) - }) - .collect(); - map.insert("tool_call_meta".to_string(), Value::Array(statuses)); - } - - // Anything else that serde picked up via #[serde(flatten)] - for (k, v) in &msg.extra { - map.insert(k.clone(), v.clone()); } - - map } // ── Delegation wiring ──────────────────────────────────────────────── @@ -820,18 +754,6 @@ mod tests { assert!(thinking.contains("looking in /auth")); } - #[test] - fn test_extra_gemini_tokens_preserved() { - let (_t, p) = setup_provider(); - let view = - ConversationProvider::load_conversation(&p, "/abs/myrepo", "session-uuid").unwrap(); - let claude = view.turns[1].extra.get("gemini").expect("extra[gemini]"); - let tokens = claude.get("tokens").unwrap(); - assert_eq!(tokens["input"], 100); - assert_eq!(tokens["thoughts"], 10); - assert_eq!(tokens["total"], 160); - } - #[test] fn test_list_metadata() { let (_t, p) = setup_provider(); diff --git a/crates/toolpath-gemini/tests/projection_roundtrip.rs b/crates/toolpath-gemini/tests/projection_roundtrip.rs index acd8391..87deb91 100644 --- a/crates/toolpath-gemini/tests/projection_roundtrip.rs +++ b/crates/toolpath-gemini/tests/projection_roundtrip.rs @@ -228,7 +228,10 @@ fn roundtrip_preserves_tool_calls_with_results() { } #[test] -fn roundtrip_preserves_tokens() { +fn roundtrip_preserves_input_output_tokens() { + // Input/output/cached tokens survive via Turn.token_usage. + // Thoughts/tool/total tokens were Gemini-extra only and don't + // round-trip now that Turn.extra is gone. let source = load_source_conversation(); let (_, rebuilt, _) = roundtrip(&source); @@ -247,14 +250,14 @@ fn roundtrip_preserves_tokens() { assert_eq!(bt.input, at.input, "input tokens at msg {}", i); assert_eq!(bt.output, at.output, "output tokens at msg {}", i); assert_eq!(bt.cached, at.cached, "cached tokens at msg {}", i); - assert_eq!(bt.thoughts, at.thoughts, "thoughts tokens at msg {}", i); - assert_eq!(bt.tool, at.tool, "tool tokens at msg {}", i); - assert_eq!(bt.total, at.total, "total tokens at msg {}", i); } } #[test] -fn roundtrip_preserves_thoughts_losslessly() { +fn roundtrip_preserves_thought_subjects_and_descriptions() { + // Subject/description survive via the flattened `Turn.thinking` + // string, which the projector splits and re-parses. Thought + // timestamps lived only in Gemini extras and are now lost. let source = load_source_conversation(); let (_, rebuilt, _) = roundtrip(&source); @@ -286,11 +289,6 @@ fn roundtrip_preserves_thoughts_losslessly() { "thought description mismatch at msg {} idx {}", i, j ); - assert_eq!( - b.timestamp, a.timestamp, - "thought timestamp mismatch at msg {} idx {}", - i, j - ); } } } diff --git a/crates/toolpath-opencode/src/project.rs b/crates/toolpath-opencode/src/project.rs index 040161d..2372e66 100644 --- a/crates/toolpath-opencode/src/project.rs +++ b/crates/toolpath-opencode/src/project.rs @@ -668,11 +668,8 @@ fn synthesize_edit_diff(input: &Value) -> Option { Some(out) } -fn opencode_extras(turn: &Turn) -> Option<&Map> { - turn.extra.get("opencode").and_then(|v| match v { - Value::Object(m) => Some(m), - _ => None, - }) +fn opencode_extras(_turn: &Turn) -> Option<&'static Map> { + None } fn mint_session_id(seed: &str) -> String { @@ -767,7 +764,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -786,7 +782,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -853,7 +848,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::Shell), - ..Default::default() }]; let s = OpencodeProjector::default() .project(&view_with(vec![t])) @@ -892,7 +886,6 @@ mod tests { is_error: true, }), category: Some(ToolCategory::Shell), - ..Default::default() }]; let s = OpencodeProjector::default() .project(&view_with(vec![t])) @@ -917,7 +910,6 @@ mod tests { input: json!({"file_path": "x.rs", "old_string": "a", "new_string": "b"}), result: None, category: Some(ToolCategory::FileWrite), - ..Default::default() }]; let s = OpencodeProjector::default() .project(&view_with(vec![t])) @@ -947,20 +939,6 @@ mod tests { ); } - #[test] - fn foreign_namespace_extras_are_dropped() { - let mut t = assistant_turn("hi"); - t.extra - .insert("claude".into(), json!({"version": "2.1.116"})); - t.extra.insert("gemini".into(), json!({"foo": "bar"})); - let s = OpencodeProjector::default() - .project(&view_with(vec![t])) - .unwrap(); - let serialized = serde_json::to_string(&s).unwrap(); - assert!(!serialized.contains("\"version\":\"2.1.116\"")); - assert!(!serialized.contains("\"foo\":\"bar\"")); - } - #[test] fn assistant_parent_id_chains_to_prior_user_message() { let s = OpencodeProjector::default() diff --git a/crates/toolpath-opencode/src/provider.rs b/crates/toolpath-opencode/src/provider.rs index d8f099f..78fe04e 100644 --- a/crates/toolpath-opencode/src/provider.rs +++ b/crates/toolpath-opencode/src/provider.rs @@ -28,7 +28,7 @@ //! its own id, linked by `session.parent_id`). use chrono::{TimeZone, Utc}; -use serde_json::{Map, Value}; +use serde_json::Value; use std::collections::HashMap; use crate::error::Result; @@ -275,34 +275,13 @@ impl<'a> Builder<'a> { } } - fn handle_user_message(&mut self, msg: &Message, u: &UserMessage) { + fn handle_user_message(&mut self, msg: &Message, _u: &UserMessage) { let text = concat_text_parts(&msg.parts); let environment = Some(EnvironmentSnapshot { working_dir: Some(self.session.directory.to_string_lossy().to_string()), vcs_branch: None, vcs_revision: None, }); - let mut extra: HashMap = HashMap::new(); - let mut opencode_extra = Map::new(); - opencode_extra.insert("agent".into(), Value::String(u.agent.clone())); - opencode_extra.insert( - "model".into(), - serde_json::to_value(&u.model).unwrap_or(Value::Null), - ); - if let Some(tools) = &u.tools { - opencode_extra.insert( - "tools".into(), - serde_json::to_value(tools).unwrap_or(Value::Null), - ); - } - if let Some(system) = &u.system - && !system.is_empty() - { - opencode_extra.insert("system".into(), Value::String(system.clone())); - } - if !opencode_extra.is_empty() { - extra.insert("opencode".into(), Value::Object(opencode_extra)); - } self.turns.push(Turn { id: msg.id.clone(), @@ -317,7 +296,6 @@ impl<'a> Builder<'a> { token_usage: None, environment, delegations: Vec::new(), - extra, file_mutations: Vec::new(), }); } @@ -327,11 +305,9 @@ impl<'a> Builder<'a> { let mut thinking_chunks: Vec = Vec::new(); let mut tool_uses: Vec = Vec::new(); let mut snapshots: Vec = Vec::new(); - let mut patches: Vec = Vec::new(); let mut delegations: Vec = Vec::new(); let mut step_usage = TokenUsage::default(); let mut step_usage_set = false; - let mut step_cost_total = 0.0_f64; let mut stop_reason: Option = None; for p in &msg.parts { @@ -368,7 +344,6 @@ impl<'a> Builder<'a> { } accumulate_tokens(&mut step_usage, &sf.tokens); step_usage_set = true; - step_cost_total += sf.cost; stop_reason = Some(sf.reason.clone()); } PartData::Snapshot(s) => { @@ -377,10 +352,6 @@ impl<'a> Builder<'a> { } } PartData::Patch(pp) => { - patches.push(serde_json::json!({ - "hash": pp.hash, - "files": pp.files, - })); for f in &pp.files { if self.files_changed_seen.insert(f.clone()) { self.files_changed_order.push(f.clone()); @@ -463,30 +434,6 @@ impl<'a> Builder<'a> { vcs_revision: None, }); - let mut extra: HashMap = HashMap::new(); - let mut opencode_extra: Map = Map::new(); - opencode_extra.insert("agent".into(), Value::String(a.agent.clone())); - opencode_extra.insert("providerID".into(), Value::String(a.provider_id.clone())); - opencode_extra.insert("modelID".into(), Value::String(a.model_id.clone())); - opencode_extra.insert("cost_step_total".into(), json_num(step_cost_total)); - opencode_extra.insert("cost_message".into(), json_num(a.cost)); - if !snapshots.is_empty() { - opencode_extra.insert( - "snapshots".into(), - Value::Array(snapshots.iter().cloned().map(Value::String).collect()), - ); - } - if !patches.is_empty() { - opencode_extra.insert("patches".into(), Value::Array(patches)); - } - if let Some(v) = &a.variant { - opencode_extra.insert("variant".into(), Value::String(v.clone())); - } - if let Some(err) = &a.error { - opencode_extra.insert("error".into(), err.clone()); - } - extra.insert("opencode".into(), Value::Object(opencode_extra)); - // Compute `file_mutations` for this turn inline: // 1. If we have a snapshot repo AND a snapshot pair (prev_after, // this turn's last snapshot), walk the git2 tree↔tree diff @@ -521,7 +468,6 @@ impl<'a> Builder<'a> { token_usage, environment, delegations, - extra, file_mutations, }); } @@ -639,7 +585,6 @@ fn to_invocation( input, result, category: tool_category(&tp.tool), - ..Default::default() } } @@ -722,12 +667,6 @@ fn to_data_map(v: &Value) -> HashMap { } } -fn json_num(v: f64) -> Value { - serde_json::Number::from_f64(v) - .map(Value::Number) - .unwrap_or(Value::Null) -} - // ── ConversationProvider trait impl ───────────────────────────────── impl ConversationProvider for OpencodeConvo { @@ -1006,21 +945,6 @@ mod tests { assert_eq!(write.category, Some(ToolCategory::FileWrite)); } - #[test] - fn snapshots_surface_on_assistant_extra() { - let (_t, mgr) = setup(BASIC_SQL); - let view = to_view(&mgr.read_session("ses_x").unwrap()); - let assistant = &view.turns[1]; - let snaps = assistant.extra["opencode"]["snapshots"].as_array().unwrap(); - assert_eq!( - snaps, - &[ - Value::String("snap_a".into()), - Value::String("snap_b".into()) - ] - ); - } - #[test] fn files_changed_from_tool_input() { let (_t, mgr) = setup(BASIC_SQL); diff --git a/crates/toolpath-pi/src/project.rs b/crates/toolpath-pi/src/project.rs index 6bd4507..57bc2a0 100644 --- a/crates/toolpath-pi/src/project.rs +++ b/crates/toolpath-pi/src/project.rs @@ -176,13 +176,12 @@ fn project_view( }) } -/// Pull `Turn.extra["pi"]` if present (the namespace forward path uses). -/// Foreign-namespace extras are intentionally not consulted. -fn pi_extras(turn: &Turn) -> Option<&Map> { - turn.extra.get("pi").and_then(|v| match v { - Value::Object(m) => Some(m), - _ => None, - }) +/// Used to return `Turn.extra["pi"]`; the IR no longer carries +/// provider-namespaced extras. Always `None`. Callers fall back to +/// reconstructing source-format details from typed IR fields and +/// reasonable defaults. +fn pi_extras(_turn: &Turn) -> Option<&'static Map> { + None } /// Emit `ModelChange` / `ThinkingLevelChange` / `Label` entries that the @@ -755,7 +754,6 @@ fn extra_map_from(v: Option<&Value>) -> HashMap { #[cfg(test)] mod tests { use super::*; - use std::collections::HashMap; use toolpath_convo::{TokenUsage, ToolCategory, ToolInvocation, ToolResult}; fn user_turn(id: &str, text: &str) -> Turn { @@ -772,7 +770,6 @@ mod tests { token_usage: None, environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -796,7 +793,6 @@ mod tests { }), environment: None, delegations: vec![], - extra: HashMap::new(), file_mutations: Vec::new(), } } @@ -859,7 +855,6 @@ mod tests { is_error: false, }), category: Some(ToolCategory::FileRead), - ..Default::default() }]; let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); // session header + assistant + tool-result = 3 entries @@ -916,7 +911,6 @@ mod tests { input: serde_json::json!({"command": "ls"}), result: None, category: Some(ToolCategory::Shell), - ..Default::default() }]; let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); match &session.entries[1] { @@ -951,130 +945,6 @@ mod tests { } } - #[test] - fn test_tool_role_with_pi_extras_becomes_tool_result() { - let mut t = user_turn("tr1", "result text"); - t.role = Role::Other("tool".into()); - t.extra.insert( - "pi".into(), - serde_json::json!({ - "toolCallId": "tc1", - "toolName": "read", - "isError": false, - }), - ); - let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); - match &session.entries[1] { - Entry::Message { - message: - AgentMessage::ToolResult { - tool_call_id, - tool_name, - is_error, - .. - }, - .. - } => { - assert_eq!(tool_call_id, "tc1"); - assert_eq!(tool_name, "read"); - assert!(!is_error); - } - _ => panic!("expected ToolResult"), - } - } - - #[test] - fn test_bash_role_becomes_bash_execution() { - let mut t = user_turn("b1", "$ ls\nfile-a\nfile-b"); - t.role = Role::Other("bash".into()); - t.extra.insert( - "pi".into(), - serde_json::json!({ - "command": "ls", - "exitCode": 0, - "cancelled": false, - "truncated": false, - }), - ); - let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); - match &session.entries[1] { - Entry::Message { - message: - AgentMessage::BashExecution { - command, - output, - exit_code, - .. - }, - .. - } => { - assert_eq!(command, "ls"); - assert_eq!(*exit_code, Some(0)); - // Leading `$ ls\n` is stripped to recover the original output. - assert_eq!(output, "file-a\nfile-b"); - } - _ => panic!("expected BashExecution"), - } - } - - #[test] - fn test_foreign_namespace_extras_dropped() { - // Turn.extra["claude"] / Turn.extra["gemini"] must NOT appear - // anywhere on the projected entries — only Turn.extra["pi"] is - // honored. - let mut t = assistant_turn("a1", "hi"); - t.extra.insert( - "claude".into(), - serde_json::json!({"version": "2.1.116", "user_type": "external"}), - ); - t.extra - .insert("gemini".into(), serde_json::json!({"foo": "bar"})); - - let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); - let serialized = serde_json::to_string(&session.entries[1]).unwrap(); - assert!( - !serialized.contains("\"version\":\"2.1.116\""), - "claude extras leaked: {}", - serialized - ); - assert!( - !serialized.contains("\"foo\":\"bar\""), - "gemini extras leaked: {}", - serialized - ); - } - - #[test] - fn test_compaction_synthetic_turn_becomes_compaction_entry() { - let mut t = user_turn("c1", "Compacted (summary): old stuff"); - t.role = Role::System; - t.extra.insert( - "pi".into(), - serde_json::json!({ - "compaction": { - "summary": "old stuff", - "firstKeptEntryId": "m5", - "tokensBefore": 50000, - "fromHook": false, - } - }), - ); - let session = PiProjector::default().project(&view_with(vec![t])).unwrap(); - match &session.entries[1] { - Entry::Compaction { - summary, - first_kept_entry_id, - tokens_before, - .. - } => { - assert_eq!(summary, "old stuff"); - assert_eq!(first_kept_entry_id, "m5"); - assert_eq!(*tokens_before, 50000); - } - other => panic!("expected Compaction, got {:?}", other), - } - } - #[test] fn test_session_header_uses_view_id_and_first_turn_cwd() { use toolpath_convo::EnvironmentSnapshot; diff --git a/crates/toolpath-pi/src/provider.rs b/crates/toolpath-pi/src/provider.rs index eb90117..883dbf5 100644 --- a/crates/toolpath-pi/src/provider.rs +++ b/crates/toolpath-pi/src/provider.rs @@ -17,7 +17,7 @@ use crate::types::{ AgentMessage, ContentBlock, Entry, MessageContent, StopReason, ToolResultContent, Usage, }; use chrono::{DateTime, Utc}; -use serde_json::{Map, Value, json}; +use serde_json::{Value, json}; use std::collections::HashMap; use toolpath_convo::{ ConversationMeta, ConversationProvider, ConversationView, ConvoError, DelegatedWork, @@ -205,30 +205,6 @@ fn truncate_output(output: &str, max: usize) -> String { } } -// ── Pending-metadata buffer ────────────────────────────────────────── - -#[derive(Default)] -struct PendingMeta { - model_change: Option, - thinking_level_change: Option, - labels: Vec, -} - -impl PendingMeta { - fn drain_into(&mut self, pi: &mut Map) { - if let Some(v) = self.model_change.take() { - pi.insert("modelChange".to_string(), v); - } - if let Some(v) = self.thinking_level_change.take() { - pi.insert("thinkingLevelChange".to_string(), v); - } - if !self.labels.is_empty() { - let labels = std::mem::take(&mut self.labels); - pi.insert("labels".to_string(), Value::Array(labels)); - } - } -} - // ── Main conversion ────────────────────────────────────────────────── /// Convert a PiSession into a provider-agnostic ConversationView. @@ -249,86 +225,18 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { // Per-turn tool-result info: (tool_call_id, content, is_error). let mut tool_result_payloads: Vec<(usize, String, String, bool)> = Vec::new(); - let mut pending = PendingMeta::default(); - let mut is_first_turn = true; - for entry in &session.entries { match entry { Entry::Session(_) => continue, - Entry::ModelChange { - base, - provider, - model_id, - extra, - .. - } => { - let mut m = Map::new(); - m.insert("id".to_string(), json!(base.id)); - m.insert("timestamp".to_string(), json!(base.timestamp)); - m.insert("provider".to_string(), json!(provider)); - m.insert("modelId".to_string(), json!(model_id)); - if !extra.is_empty() { - m.insert("rawExtra".to_string(), json!(extra)); - } - pending.model_change = Some(Value::Object(m)); - } - - Entry::ThinkingLevelChange { - base, - thinking_level, - extra, - .. - } => { - let mut m = Map::new(); - m.insert("id".to_string(), json!(base.id)); - m.insert("timestamp".to_string(), json!(base.timestamp)); - m.insert("thinkingLevel".to_string(), json!(thinking_level)); - if !extra.is_empty() { - m.insert("rawExtra".to_string(), json!(extra)); - } - pending.thinking_level_change = Some(Value::Object(m)); - } - - Entry::Label { base, extra, .. } => { - let mut m = Map::new(); - m.insert("id".to_string(), json!(base.id)); - m.insert("timestamp".to_string(), json!(base.timestamp)); - if !extra.is_empty() { - m.insert("rawExtra".to_string(), json!(extra)); - } - pending.labels.push(Value::Object(m)); + Entry::ModelChange { .. } + | Entry::ThinkingLevelChange { .. } + | Entry::Label { .. } => { + // Discarded — these influence rendering only and don't map onto + // a cross-harness IR field. } - Entry::Compaction { - base, - summary, - first_kept_entry_id, - tokens_before, - details, - from_hook, - extra, - .. - } => { - let mut pi = Map::new(); - let mut comp = Map::new(); - comp.insert("summary".to_string(), json!(summary)); - comp.insert("firstKeptEntryId".to_string(), json!(first_kept_entry_id)); - comp.insert("tokensBefore".to_string(), json!(tokens_before)); - if let Some(d) = details { - comp.insert("details".to_string(), d.clone()); - } - if let Some(fh) = from_hook { - comp.insert("fromHook".to_string(), json!(fh)); - } - pi.insert("compaction".to_string(), Value::Object(comp)); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } - pending.drain_into(&mut pi); - attach_first_turn_meta(&mut pi, &mut is_first_turn, session); - let mut extra_map = HashMap::new(); - extra_map.insert("pi".to_string(), Value::Object(pi)); + Entry::Compaction { base, summary, .. } => { turns.push(Turn { id: base.id.clone(), parent_id: base.parent_id.clone(), @@ -342,37 +250,11 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { token_usage: None, environment: Some(env.clone()), delegations: vec![], - extra: extra_map, file_mutations: Vec::new(), }); } - Entry::BranchSummary { - base, - from_id, - summary, - details, - from_hook, - extra, - .. - } => { - let mut pi = Map::new(); - let mut bs = Map::new(); - bs.insert("fromId".to_string(), json!(from_id)); - if let Some(d) = details { - bs.insert("details".to_string(), d.clone()); - } - if let Some(fh) = from_hook { - bs.insert("fromHook".to_string(), json!(fh)); - } - pi.insert("branchSummary".to_string(), Value::Object(bs)); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } - pending.drain_into(&mut pi); - attach_first_turn_meta(&mut pi, &mut is_first_turn, session); - let mut extra_map = HashMap::new(); - extra_map.insert("pi".to_string(), Value::Object(pi)); + Entry::BranchSummary { base, summary, .. } => { turns.push(Turn { id: base.id.clone(), parent_id: base.parent_id.clone(), @@ -386,30 +268,11 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { token_usage: None, environment: Some(env.clone()), delegations: vec![], - extra: extra_map, file_mutations: Vec::new(), }); } - Entry::Custom { - base, - custom_type, - data, - extra, - .. - } => { - let mut pi = Map::new(); - let mut c = Map::new(); - c.insert("customType".to_string(), json!(custom_type)); - c.insert("data".to_string(), Value::Object(data.clone())); - pi.insert("custom".to_string(), Value::Object(c)); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } - pending.drain_into(&mut pi); - attach_first_turn_meta(&mut pi, &mut is_first_turn, session); - let mut extra_map = HashMap::new(); - extra_map.insert("pi".to_string(), Value::Object(pi)); + Entry::Custom { base, .. } => { turns.push(Turn { id: base.id.clone(), parent_id: base.parent_id.clone(), @@ -423,7 +286,6 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { token_usage: None, environment: Some(env.clone()), delegations: vec![], - extra: extra_map, file_mutations: Vec::new(), }); } @@ -432,26 +294,8 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { base, custom_type, content, - display, - details, - extra, .. } => { - let mut pi = Map::new(); - let mut cm = Map::new(); - cm.insert("customType".to_string(), json!(custom_type)); - cm.insert("display".to_string(), json!(display)); - if let Some(d) = details { - cm.insert("details".to_string(), d.clone()); - } - pi.insert("customMessage".to_string(), Value::Object(cm)); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } - pending.drain_into(&mut pi); - attach_first_turn_meta(&mut pi, &mut is_first_turn, session); - let mut extra_map = HashMap::new(); - extra_map.insert("pi".to_string(), Value::Object(pi)); turns.push(Turn { id: base.id.clone(), parent_id: base.parent_id.clone(), @@ -465,18 +309,11 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { token_usage: None, environment: Some(env.clone()), delegations: vec![], - extra: extra_map, file_mutations: Vec::new(), }); } - Entry::Message { - base, - message, - extra: entry_extra, - .. - } => { - let mut pi = Map::new(); + Entry::Message { base, message, .. } => { let text; let mut thinking = None; let mut tool_uses: Vec = Vec::new(); @@ -487,23 +324,16 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { let role: Role; match message { - AgentMessage::User { content, extra, .. } => { + AgentMessage::User { content, .. } => { role = Role::User; text = extract_user_text(content); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } } AgentMessage::Assistant { content, - api, - provider, model: m, usage, stop_reason, - error_message, - extra, .. } => { role = Role::Assistant; @@ -541,78 +371,41 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { input: arguments.clone(), result: None, category, - ..Default::default() }); } } - - let mut api_obj = Map::new(); - api_obj.insert("provider".to_string(), json!(provider)); - api_obj.insert("api".to_string(), json!(api)); - pi.insert("api".to_string(), Value::Object(api_obj)); - pi.insert( - "stopReason".to_string(), - serde_json::to_value(stop_reason).unwrap_or(Value::Null), - ); - if let Some(err) = error_message { - pi.insert("errorMessage".to_string(), json!(err)); - } - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } } AgentMessage::ToolResult { tool_call_id, - tool_name, content, is_error, - details, - extra, .. } => { - role = Role::Other("tool".to_string()); - text = extract_tool_result_text(content); - pi.insert("toolCallId".to_string(), json!(tool_call_id)); - pi.insert("toolName".to_string(), json!(tool_name)); - pi.insert("isError".to_string(), json!(is_error)); - if let Some(d) = details { - pi.insert("details".to_string(), d.clone()); - } - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } + // Tool results fold onto the matching assistant + // turn's `tool_uses[i].result` via pass 2. We don't + // emit them as standalone turns — that mirrors how + // claude/gemini/codex/opencode derive treats tool + // results, and keeps Pi → Pi idempotent without + // smuggling tool_call_id through Turn.extra. tool_result_payloads.push(( - turns.len(), + usize::MAX, tool_call_id.clone(), - text.clone(), + extract_tool_result_text(content), *is_error, )); + continue; } AgentMessage::BashExecution { command, output, exit_code, - cancelled, - truncated, - full_output_path, - extra, .. } => { role = Role::Other("bash".to_string()); let out_trunc = truncate_output(output, 4096); text = format!("$ {}\n{}", command, out_trunc); - pi.insert("command".to_string(), json!(command)); - pi.insert("exitCode".to_string(), json!(exit_code)); - pi.insert("cancelled".to_string(), json!(cancelled)); - pi.insert("truncated".to_string(), json!(truncated)); - if let Some(fop) = full_output_path { - pi.insert("fullOutputPath".to_string(), json!(fop)); - } - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } // Synthetic ToolInvocation representing the bash run itself. tool_uses.push(ToolInvocation { id: base.id.clone(), @@ -623,57 +416,24 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { is_error: !matches!(exit_code, Some(0)), }), category: Some(ToolCategory::Shell), - ..Default::default() }); } AgentMessage::Custom { custom_type, content, - display, - details, - extra, .. } => { role = Role::Other(format!("custom:{}", custom_type)); text = extract_user_text(content); - pi.insert("customType".to_string(), json!(custom_type)); - pi.insert("display".to_string(), json!(display)); - if let Some(d) = details { - pi.insert("details".to_string(), d.clone()); - } - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } } - AgentMessage::BranchSummary { extra, .. } => { + AgentMessage::BranchSummary { .. } | AgentMessage::CompactionSummary { .. } => { role = Role::System; text = String::new(); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } } - - AgentMessage::CompactionSummary { extra, .. } => { - role = Role::System; - text = String::new(); - if !extra.is_empty() { - pi.insert("rawExtra".to_string(), json!(extra)); - } - } - } - - if !entry_extra.is_empty() { - pi.insert("entryExtra".to_string(), json!(entry_extra)); } - pending.drain_into(&mut pi); - attach_first_turn_meta(&mut pi, &mut is_first_turn, session); - - let mut extra_map = HashMap::new(); - extra_map.insert("pi".to_string(), Value::Object(pi)); - turns.push(Turn { id: base.id.clone(), parent_id: base.parent_id.clone(), @@ -687,7 +447,6 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { token_usage, environment: Some(env.clone()), delegations, - extra: extra_map, file_mutations: Vec::new(), }); } @@ -784,16 +543,6 @@ pub fn session_to_view(session: &PiSession) -> ConversationView { } } -/// On the first emitted turn, attach `parentSession` annotation if present. -fn attach_first_turn_meta(pi: &mut Map, is_first: &mut bool, session: &PiSession) { - if *is_first { - if let Some(parent) = &session.header.parent_session { - pi.insert("parentSession".to_string(), json!(parent)); - } - *is_first = false; - } -} - // ── ConversationProvider impl for PiConvo ──────────────────────────── fn to_convo_err(e: PiError) -> ConvoError { @@ -1122,7 +871,9 @@ mod tests { } #[test] - fn test_tool_result_appears_as_own_turn() { + fn test_orphan_tool_result_is_dropped() { + // A ToolResult entry without a matching assistant turn folds + // into nothing — the IR doesn't model standalone tool turns. let tr = Entry::Message { base: base("a", None, "t"), message: AgentMessage::ToolResult { @@ -1140,8 +891,7 @@ mod tests { extra: HashMap::new(), }; let v = session_to_view(&session_from(vec![tr], "/tmp/p")); - assert_eq!(v.turns.len(), 1); - assert_eq!(v.turns[0].role, Role::Other("tool".to_string())); + assert_eq!(v.turns.len(), 0); } #[test] @@ -1194,8 +944,6 @@ mod tests { let v = session_to_view(&session_from(vec![c], "/tmp/p")); assert_eq!(v.turns[0].role, Role::System); assert!(v.turns[0].text.starts_with("Compacted")); - let pi = v.turns[0].extra.get("pi").unwrap(); - assert!(pi.get("compaction").is_some()); } #[test] @@ -1211,12 +959,10 @@ mod tests { let v = session_to_view(&session_from(vec![bs], "/tmp/p")); assert_eq!(v.turns[0].role, Role::System); assert!(v.turns[0].text.starts_with("Branch summary")); - let pi = v.turns[0].extra.get("pi").unwrap(); - assert!(pi.get("branchSummary").is_some()); } #[test] - fn test_model_change_attaches_to_next_message() { + fn test_model_change_drops_silently() { let mc = Entry::ModelChange { base: base("mc", None, "t"), provider: "anthropic".into(), @@ -1226,8 +972,6 @@ mod tests { let msg = user_text_entry("u", None, "hi"); let v = session_to_view(&session_from(vec![mc, msg], "/tmp/p")); assert_eq!(v.turns.len(), 1); - let pi = v.turns[0].extra.get("pi").unwrap(); - assert!(pi.get("modelChange").is_some()); } #[test] diff --git a/crates/toolpath-pi/tests/end_to_end.rs b/crates/toolpath-pi/tests/end_to_end.rs index 8ff8a3b..0ed9b44 100644 --- a/crates/toolpath-pi/tests/end_to_end.rs +++ b/crates/toolpath-pi/tests/end_to_end.rs @@ -75,8 +75,10 @@ fn test_to_view_produces_expected_turns() { let session = manager.read_session(PROJECT_CWD, "demo-session-1").unwrap(); let view = manager.to_view(&session); - // Turn count: user + assistant + toolResult + assistant = 4 - assert_eq!(view.turns.len(), 4); + // Turn count: user + assistant + assistant = 3 + // (tool-result entries fold into the assistant's tool_uses[i].result; + // they no longer surface as standalone turns.) + assert_eq!(view.turns.len(), 3); assert_eq!(view.provider_id.as_deref(), Some("pi")); // files_changed should include "hello.rs" assert!(view.files_changed.iter().any(|f| f == "hello.rs")); @@ -94,11 +96,11 @@ fn test_derive_path_from_fixture() { let path = toolpath_pi::derive_path(&session, &DeriveConfig::default()); - // Path has 4 steps (one per turn). - assert_eq!(path.steps.len(), 4); + // Path has 3 steps (one per turn; tool-result entries fold into the + // matching assistant turn rather than emitting standalone turns). + assert_eq!(path.steps.len(), 3); // Path ID format. assert!(path.path.id.starts_with("path-pi-")); - // Head points at the last step. // Head matches the last turn's native id (which is the source entry id). assert_eq!(path.path.head, "m4"); // Base URI derived from cwd. From e17b270a58c8d8a7da8da2c8c4314684163282a8 Mon Sep 17 00:00:00 2001 From: Ben Barber Date: Tue, 19 May 2026 14:06:22 -0400 Subject: [PATCH 10/10] fix(claude): preserve turn chain across tool-result entries; emit usage as snake_case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two UI-breaking bugs in the claude import/export round trip: **1. Broken turn chain.** When a tool-result-only user entry was absorbed into the preceding assistant's tool_uses[i].result, its UUID was dropped on the floor. The next assistant entry's wire parentUuid (which pointed at the absorbed entry) then had no matching Turn in the IR, so `derive_path` couldn't resolve it and emitted an empty parents list. In a real session that meant **985 of 2661 steps had no parents** after roundtrip. Claude's UI walks the chain from the latest entry backwards via parentUuid; the first None bounce stopped the walk at the tail, which is why opening the re-exported conversation showed only the very last response. Fix: when absorbing a tool-result-only entry, record a rewrite from `entry.uuid` → previous turn id. Apply the rewrite when computing the next turn's parent_id. Now every assistant's parent resolves and the walk from head reaches the root user. **2. "NaN tokens old" in the UI.** The `Usage` struct was tagged `rename_all = "camelCase"`, which emitted `inputTokens` / `outputTokens` / `cacheCreationInputTokens` / `cacheReadInputTokens` on serialize. But Claude's wire format passes `message.usage` straight from the Anthropic API, which is snake_case. The UI parses `input_tokens` etc., got undefined for every field, and computed NaN. Fix: switch to `rename_all = "snake_case"` on `Usage` and `CacheCreation`. The `#[serde(alias = ...)]` attrs were already snake_case, so deserialization was never broken — only serialization. --- crates/toolpath-claude/src/provider.rs | 35 ++++++++++++++++++++++++-- crates/toolpath-claude/src/types.rs | 17 ++++++------- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/crates/toolpath-claude/src/provider.rs b/crates/toolpath-claude/src/provider.rs index 9b2dcb0..a619e8c 100644 --- a/crates/toolpath-claude/src/provider.rs +++ b/crates/toolpath-claude/src/provider.rs @@ -324,21 +324,52 @@ fn conversation_to_view(convo: &Conversation) -> ConversationView { events.push(preamble_to_event(idx, raw)); } + // Map from "absorbed-or-skipped entry UUID" → "the previous + // turn-bearing entry's UUID". Used so that an assistant turn whose + // wire parentUuid points at a tool-result-only entry (or any other + // absorbed entry that didn't become a Turn) gets a Turn.parent_id + // that still maps onto a real Turn — keeping the IR's turn-to-turn + // chain intact for `derive_path`. The original UUID is preserved + // via the `tool_result_user` event. + let mut parent_rewrites: HashMap = HashMap::new(); + let mut last_turn_uuid: Option = None; + for entry in &convo.entries { let Some(msg) = &entry.message else { // Message-less entries (attachments, snapshots) survive as // events so the projector can re-emit them. events.push(entry_to_event(entry)); + if let Some(prev) = &last_turn_uuid { + parent_rewrites.insert(entry.uuid.clone(), prev.clone()); + } continue; }; - // Tool-result-only user entries get merged into existing turns + // Tool-result-only user entries get merged into the preceding + // assistant's tool_uses[i].result and dropped from the turn + // stream. The next assistant entry's wire parentUuid points at + // this entry; we record a rewrite so the IR's turn-to-turn chain + // stays connected. (The projector re-synthesizes the wire-level + // tool-result entries on the way out from tool_uses[i].result — + // their original UUIDs aren't preserved across the roundtrip, + // but the Claude UI walks the chain by parentUuid, not by + // specific UUIDs, so that's fine.) if is_tool_result_only(entry) { merge_tool_results(&mut turns, msg); + if let Some(prev) = &last_turn_uuid { + parent_rewrites.insert(entry.uuid.clone(), prev.clone()); + } continue; } - turns.push(message_to_turn(entry, msg)); + let mut turn = message_to_turn(entry, msg); + if let Some(pid) = turn.parent_id.as_ref() + && let Some(real) = parent_rewrites.get(pid) + { + turn.parent_id = Some(real.clone()); + } + last_turn_uuid = Some(turn.id.clone()); + turns.push(turn); } // Re-derive delegation results now that tool results are merged diff --git a/crates/toolpath-claude/src/types.rs b/crates/toolpath-claude/src/types.rs index 925b02d..490b4da 100644 --- a/crates/toolpath-claude/src/types.rs +++ b/crates/toolpath-claude/src/types.rs @@ -342,29 +342,26 @@ impl std::str::FromStr for MessageRole { } } +// Claude's JSONL envelope is camelCase (`parentUuid`, `sessionId`, etc.), +// but the embedded `message.usage` object is forwarded straight from the +// Anthropic API — which is snake_case. Mismatching this breaks the UI's +// context-window readout (it parses `input_tokens` etc. and renders NaN +// when they're absent). #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] +#[serde(rename_all = "snake_case")] pub struct Usage { - #[serde(alias = "input_tokens")] pub input_tokens: Option, - #[serde(alias = "output_tokens")] pub output_tokens: Option, - #[serde(alias = "cache_creation_input_tokens")] pub cache_creation_input_tokens: Option, - #[serde(alias = "cache_read_input_tokens")] pub cache_read_input_tokens: Option, - #[serde(alias = "cache_creation")] pub cache_creation: Option, - #[serde(alias = "service_tier")] pub service_tier: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] +#[serde(rename_all = "snake_case")] pub struct CacheCreation { - #[serde(alias = "ephemeral_5m_input_tokens")] pub ephemeral_5m_input_tokens: Option, - #[serde(alias = "ephemeral_1h_input_tokens")] pub ephemeral_1h_input_tokens: Option, }