From 9bfc50aa9de340056caf9ec69767601d1a7e4f48 Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Fri, 5 Jun 2026 23:54:00 +0900 Subject: [PATCH 1/9] feat(observability): add phase usage events exporter --- docs/configuration.ja.md | 4 +- docs/configuration.md | 4 +- src/__tests__/otelFoundation.test.ts | 78 ++++- src/__tests__/phaseUsageEvent.test.ts | 165 +++++++++ .../usageEventsSpanProcessor.test.ts | 122 +++++++ src/__tests__/workflowSpans.test.ts | 32 ++ src/agents/judge-status-usecase.ts | 9 + .../prompt-based-structured-caller.ts | 6 + src/core/logging/contracts.ts | 1 + src/core/logging/phaseUsageEvent.ts | 312 ++++++++++++++++++ src/core/workflow/arpeggio/types.ts | 3 + src/core/workflow/engine/ArpeggioRunner.ts | 4 + src/core/workflow/engine/ParallelRunner.ts | 1 + src/core/workflow/engine/StepExecutor.ts | 1 + .../engine/team-leader-part-runner.ts | 1 + .../workflow/observability/workflowSpans.ts | 39 ++- src/core/workflow/report-phase-runner.ts | 1 + src/core/workflow/status-judgment-phase.ts | 1 + src/core/workflow/types.ts | 2 + .../execute/workflowExecutionBootstrap.ts | 16 +- src/infra/observability/otelFoundation.ts | 22 +- .../observability/usageEventsSpanProcessor.ts | 94 ++++++ 22 files changed, 906 insertions(+), 12 deletions(-) create mode 100644 src/__tests__/phaseUsageEvent.test.ts create mode 100644 src/__tests__/usageEventsSpanProcessor.test.ts create mode 100644 src/core/logging/phaseUsageEvent.ts create mode 100644 src/infra/observability/usageEventsSpanProcessor.ts diff --git a/docs/configuration.ja.md b/docs/configuration.ja.md index 1e0865c87..7394913a5 100644 --- a/docs/configuration.ja.md +++ b/docs/configuration.ja.md @@ -172,7 +172,7 @@ interactive_preview_steps: 3 # インタラクティブモードでの step プ | `workflow_runtime_prepare` | object | `{ custom_scripts: false }` | ランタイム prepare ポリシー(ビルトインプリセットは常に許可) | | `workflow_command_gates` | object | `{ custom_scripts: false }` | workflow YAML command quality gate ポリシー | | `sync_conflict_resolver` | object | `{ auto_approve_tools: false }` | sync conflict resolver ポリシー | -| `observability` | object | 無効 | OpenTelemetry foundation の opt-in 設定。`enabled` で SDK を初期化し、`monitor` は workflow metric を `.takt/runs//monitor.json` に出力し、`session_log_exporter` は span 由来の shadow session log を出力します。`usage_events_phase` は後続変更向けの予約フラグです。 | +| `observability` | object | 無効 | OpenTelemetry foundation の opt-in 設定。`enabled` で SDK を初期化し、`monitor` は workflow metric を `.takt/runs//monitor.json` に出力し、`session_log_exporter` は span 由来の shadow session log を出力します。`usage_events_phase` は phase 粒度の usage events を `.takt/runs//logs/-usage-events.phase.jsonl` に出力します。 | ## プロジェクト設定 @@ -235,7 +235,7 @@ concurrency: 2 # このプロジェクトでの takt run 並列 | `workflow_runtime_prepare` | object | - | ランタイム prepare ポリシー(グローバルを上書き) | | `workflow_command_gates` | object | - | workflow YAML command quality gate ポリシー(グローバルを上書き) | | `sync_conflict_resolver` | object | - | sync conflict resolver ポリシー(グローバルを上書き) | -| `observability` | object | - | プロジェクトレベルの OpenTelemetry opt-in 上書き。`enabled` で SDK を初期化し、`monitor` は workflow metric を `.takt/runs//monitor.json` に出力し、`session_log_exporter` は span 由来の shadow session log を出力します。`usage_events_phase` は後続変更向けの予約フラグです。 | +| `observability` | object | - | プロジェクトレベルの OpenTelemetry opt-in 上書き。`enabled` で SDK を初期化し、`monitor` は workflow metric を `.takt/runs//monitor.json` に出力し、`session_log_exporter` は span 由来の shadow session log を出力します。`usage_events_phase` は phase 粒度の usage events を `.takt/runs//logs/-usage-events.phase.jsonl` に出力します。 | プロジェクト設定の値は、両方が設定されている場合にグローバル設定を上書きします。 diff --git a/docs/configuration.md b/docs/configuration.md index a40958b8b..a543d1335 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -172,7 +172,7 @@ interactive_preview_steps: 3 # Step previews in interactive mode (0-10, default | `workflow_runtime_prepare` | object | `{ custom_scripts: false }` | Runtime prepare policy (builtin presets always allowed) | | `workflow_command_gates` | object | `{ custom_scripts: false }` | Workflow YAML command quality gate policy | | `sync_conflict_resolver` | object | `{ auto_approve_tools: false }` | Sync conflict resolver policy | -| `observability` | object | disabled | Opt-in OpenTelemetry foundation. `enabled` initializes the SDK, `monitor` writes workflow metrics to `.takt/runs//monitor.json`, `session_log_exporter` writes a shadow session log from spans, and `usage_events_phase` is reserved for a later change. | +| `observability` | object | disabled | Opt-in OpenTelemetry foundation. `enabled` initializes the SDK, `monitor` writes workflow metrics to `.takt/runs//monitor.json`, `session_log_exporter` writes a shadow session log from spans, and `usage_events_phase` writes phase-level usage events to `.takt/runs//logs/-usage-events.phase.jsonl`. | ## Project Configuration @@ -235,7 +235,7 @@ concurrency: 2 # Parallel task count for takt run in this project | `workflow_runtime_prepare` | object | - | Runtime prepare policy (overrides global) | | `workflow_command_gates` | object | - | Workflow YAML command quality gate policy (overrides global) | | `sync_conflict_resolver` | object | - | Sync conflict resolver policy (overrides global) | -| `observability` | object | - | Project-level OpenTelemetry opt-in override. `enabled` initializes the SDK, `monitor` writes workflow metrics to `.takt/runs//monitor.json`, `session_log_exporter` writes a shadow session log from spans, and `usage_events_phase` is reserved for a later change. | +| `observability` | object | - | Project-level OpenTelemetry opt-in override. `enabled` initializes the SDK, `monitor` writes workflow metrics to `.takt/runs//monitor.json`, `session_log_exporter` writes a shadow session log from spans, and `usage_events_phase` writes phase-level usage events to `.takt/runs//logs/-usage-events.phase.jsonl`. | Project config values override global config when both are set. diff --git a/src/__tests__/otelFoundation.test.ts b/src/__tests__/otelFoundation.test.ts index 990971272..01855ca56 100644 --- a/src/__tests__/otelFoundation.test.ts +++ b/src/__tests__/otelFoundation.test.ts @@ -47,11 +47,18 @@ const enabledMonitorObservability: ObservabilityConfigForTest = { usageEventsPhase: false, }; +const enabledUsageEventsPhaseObservability: ObservabilityConfigForTest = { + enabled: true, + monitor: false, + sessionLogExporter: false, + usageEventsPhase: true, +}; + const enabledAllObservability: ObservabilityConfigForTest = { enabled: true, monitor: true, sessionLogExporter: true, - usageEventsPhase: false, + usageEventsPhase: true, }; async function loadFoundationWithMockedSdk(): Promise<{ @@ -68,6 +75,11 @@ async function loadFoundationWithMockedSdk(): Promise<{ runId: string; monitorPath: string; }; + usageEventsExporter?: { + runId: string; + sessionId: string; + phaseUsageLogPath: string; + }; }, ) => Promise<{ shutdown(): Promise }>; sdkImportCount: () => number; @@ -147,6 +159,11 @@ async function loadFoundationWithMockedSdk(): Promise<{ runId: string; monitorPath: string; }; + usageEventsExporter?: { + runId: string; + sessionId: string; + phaseUsageLogPath: string; + }; }, ) => Promise<{ shutdown(): Promise }>; }; @@ -198,7 +215,7 @@ describe('otel foundation', () => { 'service.name': 'takt', 'service.version': packageJson.version, }); - expect(foundation.constructedOptions[0]?.spanProcessors).toHaveLength(1); + expect(foundation.constructedOptions[0]?.spanProcessors).toHaveLength(2); expect(foundation.constructedOptions[0]?.metricReaders).toHaveLength(1); expect(foundation.constructedOptions[0]).not.toHaveProperty('traceExporter'); expect(foundation.shutdownMock).toHaveBeenCalledOnce(); @@ -223,7 +240,7 @@ describe('otel foundation', () => { ); await handle.shutdown(); - expect(foundation.constructedOptions[0]?.spanProcessors).toHaveLength(1); + expect(foundation.constructedOptions[0]?.spanProcessors).toHaveLength(2); const records = readFileSync(shadowLogPath, 'utf-8') .trim() .split('\n') @@ -267,6 +284,61 @@ describe('otel foundation', () => { } }); + it('should attach the phase usage events span processor when usage events phase is enabled', async () => { + const foundation = await loadFoundationWithMockedSdk(); + const tempDir = mkdtempSync(join(tmpdir(), 'takt-otel-usage-events-')); + const phaseUsageLogPath = join(tempDir, 'session-usage-events.phase.jsonl'); + + try { + const handle = await foundation.initializeOtelFoundation( + enabledUsageEventsPhaseObservability, + { + usageEventsExporter: { + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath, + }, + }, + ); + + const processor = foundation.constructedOptions[0]?.spanProcessors?.[1] as { + onEnd(span: unknown): void; + }; + processor.onEnd({ + name: 'phase.implement.execute', + endTime: [1_778_777_205, 0], + attributes: { + 'takt.run.id': 'run-1', + 'takt.provider.name': 'mock', + 'takt.model.name': 'mock-model', + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.number': 1, + 'takt.phase.name': 'execute', + 'takt.phase.status': 'done', + 'gen_ai.usage.input_tokens': 3, + 'gen_ai.usage.output_tokens': 2, + 'gen_ai.usage.total_tokens': 5, + }, + }); + await handle.shutdown(); + + const records = readFileSync(phaseUsageLogPath, 'utf-8') + .trim() + .split('\n') + .map((line) => JSON.parse(line) as Record); + expect(records).toEqual([ + expect.objectContaining({ + run_id: 'run-1', + session_id: 'session-1', + phase: 'phase1_execute', + }), + ]); + } finally { + rmSync(tempDir, { recursive: true, force: true }); + } + }); + it('should route run-local session logs and monitor files through the shared SDK', async () => { const foundation = await loadFoundationWithMockedSdk(); const tempDir = mkdtempSync(join(tmpdir(), 'takt-otel-run-routing-')); diff --git a/src/__tests__/phaseUsageEvent.test.ts b/src/__tests__/phaseUsageEvent.test.ts new file mode 100644 index 000000000..ddc2ea0b0 --- /dev/null +++ b/src/__tests__/phaseUsageEvent.test.ts @@ -0,0 +1,165 @@ +import { describe, expect, it } from 'vitest'; +import { + mapSpanEndToPhaseUsageEvent, + type PhaseUsageEventContext, +} from '../core/logging/phaseUsageEvent.js'; +import type { SpanSnapshot } from '../core/logging/span-to-ndjson-mapper.js'; + +const context: PhaseUsageEventContext = { + runId: 'run-1', + sessionId: 'session-1', +}; + +describe('phase usage event mapper', () => { + it('maps phase 1 spans into phase usage event records', () => { + const span: SpanSnapshot = { + name: 'phase.implement.execute', + endTime: [1_778_777_205, 0], + attributes: { + 'takt.provider.name': 'codex', + 'takt.model.name': 'gpt-5', + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.number': 1, + 'takt.phase.name': 'execute', + 'takt.phase.execution_id': 'implement:1:1:1', + 'takt.phase.status': 'done', + 'takt.usage.missing': false, + 'gen_ai.usage.input_tokens': 11, + 'gen_ai.usage.output_tokens': 7, + 'gen_ai.usage.total_tokens': 18, + 'gen_ai.usage.cached_input_tokens': 3, + 'gen_ai.usage.cache_creation_input_tokens': 2, + 'gen_ai.usage.cache_read_input_tokens': 1, + }, + }; + + expect(mapSpanEndToPhaseUsageEvent(span, context)).toEqual({ + run_id: 'run-1', + session_id: 'session-1', + provider: 'codex', + provider_model: 'gpt-5', + step: 'implement', + step_type: 'normal', + phase: 'phase1_execute', + phase_name: 'execute', + phase_execution_id: 'implement:1:1:1', + timestamp: '2026-05-14T16:46:45.000Z', + success: true, + usage_missing: false, + usage: { + input_tokens: 11, + output_tokens: 7, + total_tokens: 18, + cached_input_tokens: 3, + cache_creation_input_tokens: 2, + cache_read_input_tokens: 1, + }, + }); + }); + + it('maps phase 2 missing usage with a normalized reason', () => { + const span: SpanSnapshot = { + name: 'phase.review.report', + endTime: [1_778_777_210, 0], + attributes: { + 'takt.provider.name': 'claude', + 'takt.step.name': 'review', + 'takt.step.type': 'normal', + 'takt.phase.number': 2, + 'takt.phase.name': 'report', + 'takt.phase.status': 'error', + 'takt.usage.missing': true, + 'takt.usage.missing_reason': 'unexpected', + }, + }; + + expect(mapSpanEndToPhaseUsageEvent(span, context)).toEqual(expect.objectContaining({ + provider: 'claude', + provider_model: '(default)', + phase: 'phase2_report', + phase_name: 'report', + success: false, + usage_missing: true, + reason: 'usage_not_available', + usage: {}, + })); + }); + + it('maps judge stage spans into stage-specific phase usage records', () => { + const span: SpanSnapshot = { + name: 'judge_stage.implement.3.ai_judge', + endTime: [1_778_777_215, 0], + attributes: { + 'takt.provider.name': 'claude-sdk', + 'takt.model.name': 'claude-sonnet-4', + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.execution_id': 'implement:3:1:1', + 'takt.judge.stage': 3, + 'takt.judge.method': 'ai_judge', + 'takt.judge.status': 'done', + 'gen_ai.usage.input_tokens': 5, + 'gen_ai.usage.output_tokens': 4, + }, + }; + + expect(mapSpanEndToPhaseUsageEvent(span, context)).toEqual(expect.objectContaining({ + phase: 'phase3_fallback', + phase_name: 'judge', + phase_execution_id: 'implement:3:1:1', + judge_stage: 3, + judge_method: 'ai_judge', + usage_missing: false, + usage: { + input_tokens: 5, + output_tokens: 4, + total_tokens: 9, + }, + })); + }); + + it('skips phase 3 parent spans and spans missing required provider or step metadata', () => { + expect(mapSpanEndToPhaseUsageEvent({ + name: 'phase.implement.judge', + attributes: { + 'takt.provider.name': 'codex', + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.number': 3, + 'takt.phase.name': 'judge', + }, + }, context)).toBeUndefined(); + + expect(mapSpanEndToPhaseUsageEvent({ + name: 'phase.implement.execute', + attributes: { + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.number': 1, + 'takt.phase.name': 'execute', + }, + }, context)).toBeUndefined(); + }); + + it('turns partial token attributes into usage_tokens_missing', () => { + const record = mapSpanEndToPhaseUsageEvent({ + name: 'phase.implement.execute', + attributes: { + 'takt.provider.name': 'mock', + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.number': 1, + 'takt.phase.name': 'execute', + 'takt.phase.status': 'done', + 'gen_ai.usage.input_tokens': 5, + }, + }, context); + + expect(record).toMatchObject({ + usage_missing: true, + reason: 'usage_tokens_missing', + usage: {}, + }); + }); +}); diff --git a/src/__tests__/usageEventsSpanProcessor.test.ts b/src/__tests__/usageEventsSpanProcessor.test.ts new file mode 100644 index 000000000..444d8f354 --- /dev/null +++ b/src/__tests__/usageEventsSpanProcessor.test.ts @@ -0,0 +1,122 @@ +import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import type { ReadableSpan } from '@opentelemetry/sdk-trace-base'; +import { UsageEventsSpanProcessor } from '../infra/observability/usageEventsSpanProcessor.js'; + +const tempDirs = new Set(); + +function createTempLogPath(name = 'session-usage-events.phase.jsonl'): string { + const dir = mkdtempSync(join(tmpdir(), 'takt-phase-usage-events-')); + tempDirs.add(dir); + return join(dir, name); +} + +function readRecords(path: string): Array> { + return readFileSync(path, 'utf-8') + .trim() + .split('\n') + .map((line) => JSON.parse(line) as Record); +} + +afterEach(() => { + for (const dir of tempDirs) { + rmSync(dir, { recursive: true, force: true }); + } + tempDirs.clear(); +}); + +describe('UsageEventsSpanProcessor', () => { + it('routes phase usage records to the matching registered run', () => { + const firstLogPath = createTempLogPath('first-usage-events.phase.jsonl'); + const secondLogPath = createTempLogPath('second-usage-events.phase.jsonl'); + const processor = new UsageEventsSpanProcessor(); + + processor.register({ + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath: firstLogPath, + }); + processor.register({ + runId: 'run-2', + sessionId: 'session-2', + phaseUsageLogPath: secondLogPath, + }); + + processor.onEnd(makePhaseSpan('run-2') as unknown as ReadableSpan); + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + + expect(readRecords(firstLogPath)).toEqual([ + expect.objectContaining({ + run_id: 'run-1', + session_id: 'session-1', + phase: 'phase1_execute', + }), + ]); + expect(readRecords(secondLogPath)).toEqual([ + expect.objectContaining({ + run_id: 'run-2', + session_id: 'session-2', + phase: 'phase1_execute', + }), + ]); + }); + + it('ignores duplicate runId registrations', () => { + const firstLogPath = createTempLogPath('first-usage-events.phase.jsonl'); + const secondLogPath = createTempLogPath('second-usage-events.phase.jsonl'); + const processor = new UsageEventsSpanProcessor(); + + processor.register({ + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath: firstLogPath, + }); + processor.register({ + runId: 'run-1', + sessionId: 'session-duplicate', + phaseUsageLogPath: secondLogPath, + }); + + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + + expect(readRecords(firstLogPath)[0]).toEqual(expect.objectContaining({ + session_id: 'session-1', + })); + expect(existsSync(secondLogPath)).toBe(false); + }); + + it('does not throw when appends fail', () => { + const phaseUsageLogPath = join(createTempLogPath(), 'missing', 'usage-events.phase.jsonl'); + const processor = new UsageEventsSpanProcessor({ + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath, + }); + + expect(() => { + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + }).not.toThrow(); + }); +}); + +function makePhaseSpan(runId: string): Record { + return { + name: 'phase.implement.execute', + endTime: [1_778_777_205, 0], + attributes: { + 'takt.run.id': runId, + 'takt.provider.name': 'mock', + 'takt.model.name': 'mock-model', + 'takt.step.name': 'implement', + 'takt.step.type': 'normal', + 'takt.phase.number': 1, + 'takt.phase.name': 'execute', + 'takt.phase.status': 'done', + 'gen_ai.usage.input_tokens': 3, + 'gen_ai.usage.output_tokens': 2, + 'gen_ai.usage.total_tokens': 5, + }, + }; +} diff --git a/src/__tests__/workflowSpans.test.ts b/src/__tests__/workflowSpans.test.ts index 098f87d63..ad87e965c 100644 --- a/src/__tests__/workflowSpans.test.ts +++ b/src/__tests__/workflowSpans.test.ts @@ -395,6 +395,15 @@ describe('workflow OpenTelemetry spans', () => { }, async () => ({ status: 'done', content: 'secret content' }), (result: { status: string; content: string }) => ({ status: result.status, content: result.content, + providerUsage: { + usageMissing: false, + inputTokens: 11, + outputTokens: 7, + totalTokens: 18, + cachedInputTokens: 3, + cacheCreationInputTokens: 2, + cacheReadInputTokens: 1, + }, })); return makeDoneResult(); }); @@ -419,6 +428,13 @@ describe('workflow OpenTelemetry spans', () => { 'takt.phase.result.content': '[REDACTED] content', 'takt.provider.name': 'codex', 'takt.model.name': 'gpt-5', + 'takt.usage.missing': false, + 'gen_ai.usage.input_tokens': 11, + 'gen_ai.usage.output_tokens': 7, + 'gen_ai.usage.total_tokens': 18, + 'gen_ai.usage.cached_input_tokens': 3, + 'gen_ai.usage.cache_creation_input_tokens': 2, + 'gen_ai.usage.cache_read_input_tokens': 1, }); expect(spans[1]?.ended).toBe(true); expect(metricRecords).toContainEqual(expect.objectContaining({ @@ -514,6 +530,16 @@ describe('workflow OpenTelemetry spans', () => { status: 'done', instruction: 'judge instruction', response: 'judge response', + providerUsage: { + usageMissing: false, + inputTokens: 5, + outputTokens: 4, + totalTokens: 9, + }, + }, + providerInfo: { + provider: 'claude-sdk', + model: 'claude-sonnet-4', }, }); return { ruleIndex: 1, method: 'structured_output' }; @@ -535,6 +561,12 @@ describe('workflow OpenTelemetry spans', () => { 'takt.judge.status': 'done', 'takt.judge.instruction': 'judge instruction', 'takt.judge.response': 'judge response', + 'takt.provider.name': 'claude-sdk', + 'takt.model.name': 'claude-sonnet-4', + 'takt.usage.missing': false, + 'gen_ai.usage.input_tokens': 5, + 'gen_ai.usage.output_tokens': 4, + 'gen_ai.usage.total_tokens': 9, }); expect(spans[0]?.attributes).toMatchObject({ 'takt.phase.status': 'done', diff --git a/src/agents/judge-status-usecase.ts b/src/agents/judge-status-usecase.ts index d2726c311..867f9519d 100644 --- a/src/agents/judge-status-usecase.ts +++ b/src/agents/judge-status-usecase.ts @@ -1,4 +1,5 @@ import type { WorkflowRule, RuleMatchMethod, Language } from '../core/models/types.js'; +import type { ProviderUsageSnapshot } from '../core/models/response.js'; import type { ProviderType } from '../core/workflow/types.js'; import { runAgent, type StreamCallback } from './runner.js'; import { detectJudgeIndex, buildJudgePrompt, isValidRuleIndex, buildJudgeConditions } from './judge-utils.js'; @@ -21,6 +22,7 @@ export interface JudgeStatusOptions { status: 'done' | 'error' | 'skipped'; instruction: string; response: string; + providerUsage?: ProviderUsageSnapshot; }) => void; onStructuredPromptResolved?: (promptParts: { systemPrompt: string; @@ -62,6 +64,7 @@ export async function runTagJudgeStage( status: tagResponse.status === 'done' ? 'done' : 'error', instruction: tagInstruction, response: tagResponse.content, + providerUsage: tagResponse.providerUsage, }); if (tagResponse.status === 'done') { @@ -88,6 +91,7 @@ export interface EvaluateConditionOptions { instruction: string; status: 'done' | 'error'; response: string; + providerUsage?: ProviderUsageSnapshot; }) => void; } @@ -111,6 +115,7 @@ export async function evaluateCondition( instruction: prompt, status: response.status === 'done' ? 'done' : 'error', response: response.content, + providerUsage: response.providerUsage, }); if (response.status !== 'done') { @@ -167,6 +172,7 @@ export async function judgeStatus( status: structuredResponse.status === 'done' ? 'done' : 'error', instruction: structuredInstruction, response: structuredResponse.content, + providerUsage: structuredResponse.providerUsage, }); if (structuredResponse.status === 'done') { @@ -204,6 +210,7 @@ export async function judgeStatus( let stage3Status: 'done' | 'error' | 'skipped' = 'skipped'; let stage3Instruction = ''; let stage3Response = ''; + let stage3ProviderUsage: ProviderUsageSnapshot | undefined; const normalizedConditions = conditions.map((c, pos) => ({ index: pos, text: c.text })); const fallbackPosition = await evaluateCondition(structuredInstruction, normalizedConditions, { cwd: options.cwd, @@ -214,6 +221,7 @@ export async function judgeStatus( stage3Status = entry.status; stage3Instruction = entry.instruction; stage3Response = entry.response; + stage3ProviderUsage = entry.providerUsage; }, }); @@ -223,6 +231,7 @@ export async function judgeStatus( status: stage3Status, instruction: stage3Instruction, response: stage3Response, + providerUsage: stage3ProviderUsage, }); if (fallbackPosition >= 0 && fallbackPosition < conditions.length) { diff --git a/src/agents/structured-caller/prompt-based-structured-caller.ts b/src/agents/structured-caller/prompt-based-structured-caller.ts index 4281cfd8c..e3ecaf369 100644 --- a/src/agents/structured-caller/prompt-based-structured-caller.ts +++ b/src/agents/structured-caller/prompt-based-structured-caller.ts @@ -1,4 +1,5 @@ import type { WorkflowRule, PartDefinition } from '../../core/models/types.js'; +import type { ProviderUsageSnapshot } from '../../core/models/response.js'; import { buildPromptBasedDecomposePrompt, buildPromptBasedMorePartsPrompt, @@ -64,6 +65,7 @@ export class PromptBasedStructuredCaller implements StructuredCaller { status: structuredResponse.status === 'done' ? 'done' : 'error', instruction: structuredInstruction, response: structuredResponse.content, + providerUsage: structuredResponse.providerUsage, }); let structuredParseError: string | undefined; @@ -103,6 +105,7 @@ export class PromptBasedStructuredCaller implements StructuredCaller { let stage3Status: 'done' | 'error' | 'skipped' = 'skipped'; let stage3Instruction = ''; let stage3Response = ''; + let stage3ProviderUsage: ProviderUsageSnapshot | undefined; const fallbackIndex = await this.evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd, provider: options.provider, @@ -112,6 +115,7 @@ export class PromptBasedStructuredCaller implements StructuredCaller { stage3Status = entry.status; stage3Instruction = entry.instruction; stage3Response = entry.response; + stage3ProviderUsage = entry.providerUsage; }, }); @@ -121,6 +125,7 @@ export class PromptBasedStructuredCaller implements StructuredCaller { status: stage3Status, instruction: stage3Instruction, response: stage3Response, + providerUsage: stage3ProviderUsage, }); if (isValidRuleIndex(fallbackIndex, rules, interactiveEnabled)) { @@ -153,6 +158,7 @@ export class PromptBasedStructuredCaller implements StructuredCaller { instruction: prompt, status: response.status === 'done' ? 'done' : 'error', response: response.content, + providerUsage: response.providerUsage, }); if (response.status !== 'done') { diff --git a/src/core/logging/contracts.ts b/src/core/logging/contracts.ts index 6e6f1197f..53b62d520 100644 --- a/src/core/logging/contracts.ts +++ b/src/core/logging/contracts.ts @@ -1,5 +1,6 @@ export const PROVIDER_EVENTS_LOG_FILE_SUFFIX = '-provider-events.jsonl'; export const USAGE_EVENTS_LOG_FILE_SUFFIX = '-usage-events.jsonl'; +export const PHASE_USAGE_EVENTS_LOG_FILE_SUFFIX = '-usage-events.phase.jsonl'; export const USAGE_MISSING_REASONS = { NOT_AVAILABLE: 'usage_not_available', diff --git a/src/core/logging/phaseUsageEvent.ts b/src/core/logging/phaseUsageEvent.ts new file mode 100644 index 000000000..6cec10ff8 --- /dev/null +++ b/src/core/logging/phaseUsageEvent.ts @@ -0,0 +1,312 @@ +import type { SpanSnapshot } from './span-to-ndjson-mapper.js'; +import { + USAGE_MISSING_REASONS, + type UsageMissingReason, +} from './contracts.js'; +import type { StepType } from './providerEvent.js'; +import type { ProviderType } from '../../shared/types/provider.js'; + +export type PhaseUsageType = + | 'phase1_execute' + | 'phase2_report' + | 'phase3_structured' + | 'phase3_tag' + | 'phase3_fallback'; + +type PhaseName = 'execute' | 'report' | 'judge'; +type JudgeMethod = 'structured_output' | 'phase3_tag' | 'ai_judge'; +type JudgeStage = 1 | 2 | 3; + +export interface PhaseUsageEventLogRecord { + run_id: string; + session_id: string; + provider: ProviderType; + provider_model: string; + step: string; + step_type: StepType; + phase: PhaseUsageType; + phase_name: PhaseName; + phase_execution_id?: string; + judge_stage?: JudgeStage; + judge_method?: JudgeMethod; + timestamp: string; + success: boolean; + usage_missing: boolean; + reason?: UsageMissingReason; + usage: { + input_tokens?: number; + output_tokens?: number; + total_tokens?: number; + cached_input_tokens?: number; + cache_creation_input_tokens?: number; + cache_read_input_tokens?: number; + }; +} + +export interface PhaseUsageEventContext { + runId: string; + sessionId: string; +} + +interface PhaseUsageMeta { + provider: ProviderType; + providerModel: string; + step: string; + stepType: StepType; + phase: PhaseUsageType; + phaseName: PhaseName; + phaseExecutionId?: string; + judgeStage?: JudgeStage; + judgeMethod?: JudgeMethod; + success: boolean; +} + +export function mapSpanEndToPhaseUsageEvent( + span: SpanSnapshot, + context: PhaseUsageEventContext, +): PhaseUsageEventLogRecord | undefined { + if (span.name.startsWith('phase.')) { + return mapPhaseSpan(span, context); + } + if (span.name.startsWith('judge_stage.')) { + return mapJudgeStageSpan(span, context); + } + return undefined; +} + +function mapPhaseSpan( + span: SpanSnapshot, + context: PhaseUsageEventContext, +): PhaseUsageEventLogRecord | undefined { + const phaseNumber = getNumber(span.attributes, 'takt.phase.number'); + const phaseName = getPhaseName(span.attributes, 'takt.phase.name'); + const phase = phaseLabelForPhaseSpan(phaseNumber, phaseName); + if (!phase || !phaseName) { + return undefined; + } + + const common = buildCommonMeta(span); + if (!common) { + return undefined; + } + + return buildRecord(span, context, { + ...common, + phase, + phaseName, + phaseExecutionId: getString(span.attributes, 'takt.phase.execution_id'), + success: getString(span.attributes, 'takt.phase.status') === 'done', + }); +} + +function mapJudgeStageSpan( + span: SpanSnapshot, + context: PhaseUsageEventContext, +): PhaseUsageEventLogRecord | undefined { + const judgeStage = getJudgeStage(span.attributes, 'takt.judge.stage'); + const judgeMethod = getJudgeMethod(span.attributes, 'takt.judge.method'); + const phase = phaseLabelForJudgeStage(judgeStage); + if (!phase || !judgeStage || !judgeMethod) { + return undefined; + } + + const common = buildCommonMeta(span); + if (!common) { + return undefined; + } + + return buildRecord(span, context, { + ...common, + phase, + phaseName: 'judge', + phaseExecutionId: getString(span.attributes, 'takt.phase.execution_id'), + judgeStage, + judgeMethod, + success: getString(span.attributes, 'takt.judge.status') === 'done', + }); +} + +function buildCommonMeta(span: SpanSnapshot): Pick | undefined { + const provider = getProvider(span.attributes, 'takt.provider.name'); + const step = getString(span.attributes, 'takt.step.name'); + const stepType = getStepType(span.attributes, 'takt.step.type'); + if (!provider || !step || !stepType) { + return undefined; + } + + return { + provider, + providerModel: getString(span.attributes, 'takt.model.name') ?? '(default)', + step, + stepType, + }; +} + +function buildRecord( + span: SpanSnapshot, + context: PhaseUsageEventContext, + meta: PhaseUsageMeta, +): PhaseUsageEventLogRecord { + const usage = extractUsage(span.attributes); + return { + run_id: context.runId, + session_id: context.sessionId, + provider: meta.provider, + provider_model: meta.providerModel, + step: meta.step, + step_type: meta.stepType, + phase: meta.phase, + phase_name: meta.phaseName, + ...(meta.phaseExecutionId ? { phase_execution_id: meta.phaseExecutionId } : {}), + ...(meta.judgeStage ? { judge_stage: meta.judgeStage } : {}), + ...(meta.judgeMethod ? { judge_method: meta.judgeMethod } : {}), + timestamp: hrTimeToIso(span.endTime), + success: meta.success, + usage_missing: usage.missing, + ...(usage.reason ? { reason: usage.reason } : {}), + usage: usage.usage, + }; +} + +function extractUsage(attributes: Record): Pick & { + missing: boolean; +} { + if (attributes['takt.usage.missing'] === true) { + return { + missing: true, + usage_missing: true, + reason: getUsageMissingReason(attributes['takt.usage.missing_reason']), + usage: {}, + }; + } + + const inputTokens = getNumber(attributes, 'gen_ai.usage.input_tokens'); + const outputTokens = getNumber(attributes, 'gen_ai.usage.output_tokens'); + const totalTokens = getNumber(attributes, 'gen_ai.usage.total_tokens') + ?? (inputTokens !== undefined && outputTokens !== undefined ? inputTokens + outputTokens : undefined); + + if (inputTokens === undefined || outputTokens === undefined || totalTokens === undefined) { + return { + missing: true, + usage_missing: true, + reason: hasAnyUsageAttribute(attributes) + ? USAGE_MISSING_REASONS.TOKENS_MISSING + : USAGE_MISSING_REASONS.NOT_AVAILABLE, + usage: {}, + }; + } + + return { + missing: false, + usage_missing: false, + usage: { + input_tokens: inputTokens, + output_tokens: outputTokens, + total_tokens: totalTokens, + ...optionalNumber('cached_input_tokens', getNumber(attributes, 'gen_ai.usage.cached_input_tokens')), + ...optionalNumber('cache_creation_input_tokens', getNumber(attributes, 'gen_ai.usage.cache_creation_input_tokens')), + ...optionalNumber('cache_read_input_tokens', getNumber(attributes, 'gen_ai.usage.cache_read_input_tokens')), + }, + }; +} + +function hasAnyUsageAttribute(attributes: Record): boolean { + return Object.keys(attributes).some((key) => key.startsWith('gen_ai.usage.')); +} + +function phaseLabelForPhaseSpan( + phaseNumber: number | undefined, + phaseName: PhaseName | undefined, +): PhaseUsageType | undefined { + if (phaseNumber === 1 && phaseName === 'execute') { + return 'phase1_execute'; + } + if (phaseNumber === 2 && phaseName === 'report') { + return 'phase2_report'; + } + return undefined; +} + +function phaseLabelForJudgeStage(stage: JudgeStage | undefined): PhaseUsageType | undefined { + switch (stage) { + case 1: + return 'phase3_structured'; + case 2: + return 'phase3_tag'; + case 3: + return 'phase3_fallback'; + default: + return undefined; + } +} + +function getString(attributes: Record, key: string): string | undefined { + const value = attributes[key]; + return typeof value === 'string' && value.length > 0 ? value : undefined; +} + +function getNumber(attributes: Record, key: string): number | undefined { + const value = attributes[key]; + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function getProvider(attributes: Record, key: string): ProviderType | undefined { + const value = getString(attributes, key); + return value === 'claude' + || value === 'claude-sdk' + || value === 'claude-terminal' + || value === 'codex' + || value === 'opencode' + || value === 'cursor' + || value === 'copilot' + || value === 'kiro' + || value === 'mock' + ? value + : undefined; +} + +function getStepType(attributes: Record, key: string): StepType | undefined { + const value = getString(attributes, key); + return value === 'normal' + || value === 'parallel' + || value === 'arpeggio' + || value === 'team_leader' + || value === 'workflow_call' + ? value + : undefined; +} + +function getPhaseName(attributes: Record, key: string): PhaseName | undefined { + const value = getString(attributes, key); + return value === 'execute' || value === 'report' || value === 'judge' ? value : undefined; +} + +function getJudgeStage(attributes: Record, key: string): JudgeStage | undefined { + const value = getNumber(attributes, key); + return value === 1 || value === 2 || value === 3 ? value : undefined; +} + +function getJudgeMethod(attributes: Record, key: string): JudgeMethod | undefined { + const value = getString(attributes, key); + return value === 'structured_output' || value === 'phase3_tag' || value === 'ai_judge' ? value : undefined; +} + +function getUsageMissingReason(value: unknown): UsageMissingReason { + return value === USAGE_MISSING_REASONS.NOT_AVAILABLE + || value === USAGE_MISSING_REASONS.TOKENS_MISSING + || value === USAGE_MISSING_REASONS.NOT_SUPPORTED_BY_PROVIDER + ? value + : USAGE_MISSING_REASONS.NOT_AVAILABLE; +} + +function optionalNumber(key: K, value: number | undefined): Partial> { + return value !== undefined ? { [key]: value } as Partial> : {}; +} + +function hrTimeToIso(time: readonly [number, number] | undefined): string { + if (!time) { + return new Date().toISOString(); + } + const [seconds, nanoseconds] = time; + return new Date((seconds * 1000) + Math.floor(nanoseconds / 1_000_000)).toISOString(); +} diff --git a/src/core/workflow/arpeggio/types.ts b/src/core/workflow/arpeggio/types.ts index 298972652..d4ff8f7ad 100644 --- a/src/core/workflow/arpeggio/types.ts +++ b/src/core/workflow/arpeggio/types.ts @@ -11,6 +11,7 @@ export type { ArpeggioMergeStepConfig, } from '../../models/workflow-types.js'; import type { AgentResponse } from '../../models/types.js'; +import type { ProviderUsageSnapshot } from '../../models/response.js'; /** A single row of data from a data source (column name → value) */ export type DataRow = Record; @@ -43,6 +44,8 @@ export interface BatchResult { readonly error?: string; /** Rate limit response to propagate to the workflow run loop */ readonly rateLimitedResponse?: AgentResponse; + /** Provider usage captured for phase-level observability */ + readonly providerUsage?: ProviderUsageSnapshot; } /** Merge function signature: takes all batch results, returns merged string */ diff --git a/src/core/workflow/engine/ArpeggioRunner.ts b/src/core/workflow/engine/ArpeggioRunner.ts index a6457b91f..7083d6502 100644 --- a/src/core/workflow/engine/ArpeggioRunner.ts +++ b/src/core/workflow/engine/ArpeggioRunner.ts @@ -163,6 +163,7 @@ async function executeBatchWithRetry( content: '', success: false, error: lastError, + providerUsage: response.providerUsage, }; } if (response.status === 'rate_limited') { @@ -172,12 +173,14 @@ async function executeBatchWithRetry( success: false, error: response.error ?? response.content, rateLimitedResponse: response, + providerUsage: response.providerUsage, }; } return { batchIndex: batch.batchIndex, content: response.content, success: true, + providerUsage: response.providerUsage, }; } catch (error) { lastError = error instanceof Error ? error.message : String(error); @@ -204,6 +207,7 @@ async function executeBatchWithRetry( status: getBatchResultStatus(result), content: result.content, error: result.error, + providerUsage: result.providerUsage, })); } diff --git a/src/core/workflow/engine/ParallelRunner.ts b/src/core/workflow/engine/ParallelRunner.ts index 1f63676fa..de40af9d6 100644 --- a/src/core/workflow/engine/ParallelRunner.ts +++ b/src/core/workflow/engine/ParallelRunner.ts @@ -248,6 +248,7 @@ export class ParallelRunner { status: result.status, content: result.content, error: result.error, + providerUsage: result.providerUsage, })); if (!didEmitPhaseStart) { throw new Error(`Missing prompt parts for phase start: ${subStep.name}:1`); diff --git a/src/core/workflow/engine/StepExecutor.ts b/src/core/workflow/engine/StepExecutor.ts index 2a58f615c..7a09c5094 100644 --- a/src/core/workflow/engine/StepExecutor.ts +++ b/src/core/workflow/engine/StepExecutor.ts @@ -573,6 +573,7 @@ export class StepExecutor { status: result.status, content: result.content, error: result.error, + providerUsage: result.providerUsage, })); response = this.normalizeStructuredOutput(step, response, runtime); if (!didEmitPhaseStart) { diff --git a/src/core/workflow/engine/team-leader-part-runner.ts b/src/core/workflow/engine/team-leader-part-runner.ts index 73c954009..41ede203c 100644 --- a/src/core/workflow/engine/team-leader-part-runner.ts +++ b/src/core/workflow/engine/team-leader-part-runner.ts @@ -96,6 +96,7 @@ export async function runTeamLeaderPart( status: result.status, content: result.content, error: result.error, + providerUsage: result.providerUsage, })); updatePersonaSession(buildSessionKey(partStep, partProviderInfo.provider), response.sessionId); return { diff --git a/src/core/workflow/observability/workflowSpans.ts b/src/core/workflow/observability/workflowSpans.ts index d2ca0bc91..914a23b98 100644 --- a/src/core/workflow/observability/workflowSpans.ts +++ b/src/core/workflow/observability/workflowSpans.ts @@ -1,8 +1,10 @@ import { context, metrics, SpanStatusCode, trace, type Attributes, type Span } from '@opentelemetry/api'; import { getErrorMessage } from '../../../shared/utils/index.js'; +import type { ProviderUsageSnapshot } from '../../models/response.js'; import type { WorkflowMaxSteps, WorkflowResumePointEntry, WorkflowStep } from '../../models/types.js'; import type { JudgeStageEntry, PhaseName, PhasePromptParts, StepProviderInfo, StepRunResult } from '../types.js'; import { getWorkflowStepKind } from '../step-kind.js'; +import { USAGE_MISSING_REASONS } from '../../logging/contracts.js'; const tracer = trace.getTracer('takt.workflow'); const WORKFLOW_RUN_COUNTER_OPTIONS = { @@ -88,6 +90,7 @@ export interface PhaseSpanOutcome { error?: string; matchedRuleIndex?: number; matchedRuleMethod?: string; + providerUsage?: ProviderUsageSnapshot; } export interface JudgeStageSpanParams { @@ -100,6 +103,7 @@ export interface JudgeStageSpanParams { workflowStack?: WorkflowResumePointEntry[]; entry: JudgeStageEntry; sanitizeText?: (text: string) => string; + providerInfo?: StepProviderInfo; } export async function runWithWorkflowSpan( @@ -180,7 +184,14 @@ export async function runWithPhaseSpan( recordPhaseMetrics(params, outcome, Date.now() - startedAt); return result; } catch (error) { - const outcome = { status: 'error', error: getErrorMessage(error) }; + const outcome = { + status: 'error', + error: getErrorMessage(error), + providerUsage: { + usageMissing: true, + reason: USAGE_MISSING_REASONS.NOT_AVAILABLE, + }, + }; recordPhaseOutcome(span, params, outcome); recordPhaseMetrics(params, outcome, Date.now() - startedAt); throw error; @@ -199,6 +210,10 @@ export function recordJudgeStageSpan(params: JudgeStageSpanParams): void { }); try { recordJudgeStageMetrics(params); + span.setAttributes(compactAttributes({ + ...providerAttributes(params.providerInfo), + ...usageAttributes(params.entry.providerUsage), + })); if (params.entry.status === 'error') { span.setStatus({ code: SpanStatusCode.ERROR, message: `judge stage ${params.entry.status}` }); } @@ -374,6 +389,7 @@ function recordPhaseOutcome(span: Span, params: PhaseSpanParams, outcome: PhaseS 'takt.phase.result.error': sanitizeSpanText(params.sanitizeText, outcome.error), 'takt.phase.result.matched_rule_index': outcome.matchedRuleIndex, 'takt.phase.result.matched_rule_method': outcome.matchedRuleMethod, + ...usageAttributes(outcome.providerUsage), })); if (outcome.status === 'error' || outcome.status === 'rate_limited') { @@ -454,6 +470,27 @@ function providerAttributes(providerInfo: StepProviderInfo | undefined): Attribu }; } +function usageAttributes(usage: ProviderUsageSnapshot | undefined): AttributeInput { + if (!usage) { + return {}; + } + if (usage.usageMissing) { + return { + 'takt.usage.missing': true, + 'takt.usage.missing_reason': usage.reason, + }; + } + return { + 'takt.usage.missing': false, + 'gen_ai.usage.input_tokens': usage.inputTokens, + 'gen_ai.usage.output_tokens': usage.outputTokens, + 'gen_ai.usage.total_tokens': usage.totalTokens, + 'gen_ai.usage.cached_input_tokens': usage.cachedInputTokens, + 'gen_ai.usage.cache_creation_input_tokens': usage.cacheCreationInputTokens, + 'gen_ai.usage.cache_read_input_tokens': usage.cacheReadInputTokens, + }; +} + // Step-span only: the canonical step_start record carries providerOptions / // providerOptionsSources. Span attributes cannot hold objects, so serialize // them as JSON and parse back in the mapper. Kept out of providerAttributes() diff --git a/src/core/workflow/report-phase-runner.ts b/src/core/workflow/report-phase-runner.ts index a5f3efe1e..7b7ccb633 100644 --- a/src/core/workflow/report-phase-runner.ts +++ b/src/core/workflow/report-phase-runner.ts @@ -246,6 +246,7 @@ async function runSingleReportAttempt( status: result.status, content: result.content, error: result.error, + providerUsage: result.providerUsage, })); if (!didEmitPhaseStart) { throw new Error(`Missing prompt parts for phase start: ${step.name}:2`); diff --git a/src/core/workflow/status-judgment-phase.ts b/src/core/workflow/status-judgment-phase.ts index 2bc0b680e..29773732c 100644 --- a/src/core/workflow/status-judgment-phase.ts +++ b/src/core/workflow/status-judgment-phase.ts @@ -163,6 +163,7 @@ export async function runStatusJudgmentPhase( workflowStack: ctx.getCurrentWorkflowStack?.(), entry, sanitizeText: ctx.sanitizeObservabilityText, + providerInfo: stepProvider, }); ctx.onJudgeStage?.(step, 3, 'judge', entry, phaseExecutionId, ctx.iteration); }, diff --git a/src/core/workflow/types.ts b/src/core/workflow/types.ts index a199b7bbd..008913a2d 100644 --- a/src/core/workflow/types.ts +++ b/src/core/workflow/types.ts @@ -14,6 +14,7 @@ import type { } from '../models/types.js'; import type { PersonaProviderEntry, ResolvedObservabilityConfig } from '../models/config-types.js'; import type { ProviderPermissionProfiles } from '../models/provider-profiles.js'; +import type { ProviderUsageSnapshot } from '../models/response.js'; import type { StepProviderOptions } from '../models/workflow-types.js'; import type { StructuredCaller } from '../../agents/structured-caller.js'; import type { SystemStepServicesFactory } from './system/system-step-services.js'; @@ -77,6 +78,7 @@ export interface JudgeStageEntry { status: 'done' | 'error' | 'skipped'; instruction: string; response: string; + providerUsage?: ProviderUsageSnapshot; } export interface StepProviderInfo { diff --git a/src/features/tasks/execute/workflowExecutionBootstrap.ts b/src/features/tasks/execute/workflowExecutionBootstrap.ts index a2e040492..37748f622 100644 --- a/src/features/tasks/execute/workflowExecutionBootstrap.ts +++ b/src/features/tasks/execute/workflowExecutionBootstrap.ts @@ -26,6 +26,7 @@ import { createProviderEventLogger, isProviderEventsEnabled } from '../../../sha import { sanitizeTerminalText } from '../../../shared/utils/text.js'; import { createUsageEventLogger, isUsageEventsEnabled } from '../../../shared/utils/usageEventLogger.js'; import { initializeOtelFoundation, type OtelFoundationHandle } from '../../../infra/observability/otelFoundation.js'; +import { PHASE_USAGE_EVENTS_LOG_FILE_SUFFIX } from '../../../core/logging/contracts.js'; import { initAnalyticsWriter } from '../../analytics/index.js'; import { AnalyticsEmitter } from './analyticsEmitter.js'; import { createOutputFns, createPrefixedStreamHandler } from './outputFns.js'; @@ -228,7 +229,11 @@ export async function createWorkflowExecutionBootstrap( logger: log, }); const observabilityOptions = globalConfig.observability.enabled - && (globalConfig.observability.sessionLogExporter || globalConfig.observability.monitor) + && ( + globalConfig.observability.sessionLogExporter + || globalConfig.observability.monitor + || globalConfig.observability.usageEventsPhase + ) ? { ...(globalConfig.observability.sessionLogExporter ? { @@ -240,6 +245,15 @@ export async function createWorkflowExecutionBootstrap( }, } : {}), + ...(globalConfig.observability.usageEventsPhase + ? { + usageEventsExporter: { + runId: runSlug, + sessionId: workflowSessionId, + phaseUsageLogPath: join(runPaths.logsAbs, `${workflowSessionId}${PHASE_USAGE_EVENTS_LOG_FILE_SUFFIX}`), + }, + } + : {}), ...(globalConfig.observability.monitor ? { monitorJsonExporter: { runId: runSlug, monitorPath: join(runPaths.runRootAbs, 'monitor.json') } } : {}), diff --git a/src/infra/observability/otelFoundation.ts b/src/infra/observability/otelFoundation.ts index 8747c1630..16d25ab3e 100644 --- a/src/infra/observability/otelFoundation.ts +++ b/src/infra/observability/otelFoundation.ts @@ -4,6 +4,7 @@ import type { IMetricReader } from '@opentelemetry/sdk-metrics'; import type { ResolvedObservabilityConfig } from '../../core/models/config-types.js'; import { SessionLogSpanProcessor, type SessionLogSpanProcessorOptions } from './sessionLogSpanProcessor.js'; import { MonitorJsonMetricExporter, type MonitorJsonMetricExporterOptions } from './monitorJsonMetricExporter.js'; +import { UsageEventsSpanProcessor, type UsageEventsSpanProcessorOptions } from './usageEventsSpanProcessor.js'; const require = createRequire(import.meta.url); const { version: TAKT_VERSION } = require('../../../package.json') as { version: string }; @@ -18,6 +19,7 @@ type SharedOtelSdk = { refCount: number; metricReaders: IMetricReader[]; sessionLogSpanProcessor?: SessionLogSpanProcessor; + usageEventsSpanProcessor?: UsageEventsSpanProcessor; monitorJsonMetricExporter?: MonitorJsonMetricExporter; }; @@ -29,6 +31,7 @@ export type OtelFoundationHandle = { export interface OtelFoundationOptions { sessionLogExporter?: SessionLogSpanProcessorOptions; + usageEventsExporter?: UsageEventsSpanProcessorOptions; monitorJsonExporter?: MonitorJsonMetricExporterOptions; } @@ -120,15 +123,27 @@ function registerRunExporters( if (options?.sessionLogExporter && shared.sessionLogSpanProcessor) { registrations.push(shared.sessionLogSpanProcessor.register(options.sessionLogExporter)); } + if (options?.usageEventsExporter && shared.usageEventsSpanProcessor) { + registrations.push(shared.usageEventsSpanProcessor.register(options.usageEventsExporter)); + } if (options?.monitorJsonExporter && shared.monitorJsonMetricExporter) { registrations.push(shared.monitorJsonMetricExporter.register(options.monitorJsonExporter)); } return registrations; } -function createSpanProcessorState(): { spanProcessors: SpanProcessor[]; sessionLogSpanProcessor: SessionLogSpanProcessor } { +function createSpanProcessorState(): { + spanProcessors: SpanProcessor[]; + sessionLogSpanProcessor: SessionLogSpanProcessor; + usageEventsSpanProcessor: UsageEventsSpanProcessor; +} { const sessionLogSpanProcessor = new SessionLogSpanProcessor(); - return { spanProcessors: [sessionLogSpanProcessor], sessionLogSpanProcessor }; + const usageEventsSpanProcessor = new UsageEventsSpanProcessor(); + return { + spanProcessors: [sessionLogSpanProcessor, usageEventsSpanProcessor], + sessionLogSpanProcessor, + usageEventsSpanProcessor, + }; } async function createMetricReaders(): Promise<{ metricReaders: IMetricReader[]; monitorJsonMetricExporter: MonitorJsonMetricExporter }> { @@ -146,7 +161,7 @@ async function createMetricReaders(): Promise<{ metricReaders: IMetricReader[]; } async function startSdk(): Promise { - const { spanProcessors, sessionLogSpanProcessor } = createSpanProcessorState(); + const { spanProcessors, sessionLogSpanProcessor, usageEventsSpanProcessor } = createSpanProcessorState(); const { metricReaders, monitorJsonMetricExporter } = await createMetricReaders(); const { NodeSDK, resources } = await import('@opentelemetry/sdk-node'); const sdk = new NodeSDK({ @@ -165,6 +180,7 @@ async function startSdk(): Promise { sdk, metricReaders, sessionLogSpanProcessor, + usageEventsSpanProcessor, monitorJsonMetricExporter, }; } diff --git a/src/infra/observability/usageEventsSpanProcessor.ts b/src/infra/observability/usageEventsSpanProcessor.ts new file mode 100644 index 000000000..2be5445d6 --- /dev/null +++ b/src/infra/observability/usageEventsSpanProcessor.ts @@ -0,0 +1,94 @@ +import type { ReadableSpan, SpanProcessor } from '@opentelemetry/sdk-trace-base'; +import { appendNdjsonLine, type NdjsonRecord } from '../fs/index.js'; +import { createLogger } from '../../shared/utils/debug.js'; +import { + mapSpanEndToPhaseUsageEvent, + type PhaseUsageEventLogRecord, +} from '../../core/logging/phaseUsageEvent.js'; +import type { SpanSnapshot } from '../../core/logging/span-to-ndjson-mapper.js'; + +const log = createLogger('usage-events-span-processor'); + +export interface UsageEventsSpanProcessorOptions { + runId: string; + sessionId: string; + phaseUsageLogPath: string; +} + +export class UsageEventsSpanProcessor implements SpanProcessor { + private readonly registrations = new Map(); + private hasReportedWriteFailure = false; + + constructor(options?: UsageEventsSpanProcessorOptions) { + if (options) { + this.register(options); + } + } + + register(options: UsageEventsSpanProcessorOptions): () => void { + if (this.registrations.has(options.runId)) { + log.warn('Ignoring duplicate phase usage event registration', { + runId: options.runId, + phaseUsageLogPath: options.phaseUsageLogPath, + }); + return () => {}; + } + this.registrations.set(options.runId, options); + return () => { + this.registrations.delete(options.runId); + }; + } + + onStart(): void {} + + onEnd(span: ReadableSpan): void { + const options = this.optionsForSpan(span); + if (!options) { + return; + } + const record = mapSpanEndToPhaseUsageEvent(toSpanSnapshot(span), { + runId: options.runId, + sessionId: options.sessionId, + }); + this.safeAppend(options, record); + } + + private optionsForSpan(span: ReadableSpan): UsageEventsSpanProcessorOptions | undefined { + const runId = span.attributes['takt.run.id']; + return typeof runId === 'string' ? this.registrations.get(runId) : undefined; + } + + private safeAppend(options: UsageEventsSpanProcessorOptions, record: PhaseUsageEventLogRecord | undefined): void { + if (!record) { + return; + } + try { + appendNdjsonLine(options.phaseUsageLogPath, record as unknown as NdjsonRecord); + } catch (error) { + if (this.hasReportedWriteFailure) { + return; + } + this.hasReportedWriteFailure = true; + const message = error instanceof Error ? error.message : String(error); + log.error('Failed to append phase usage event log record', { + phaseUsageLogPath: options.phaseUsageLogPath, + error: message, + }); + } + } + + async forceFlush(): Promise {} + + async shutdown(): Promise { + this.registrations.clear(); + } +} + +function toSpanSnapshot(span: ReadableSpan): SpanSnapshot { + return { + name: span.name, + attributes: span.attributes, + startTime: span.startTime, + endTime: span.endTime, + }; +} From ed242f4f4ada8114df40227db827c6603a03fdee Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Sat, 6 Jun 2026 08:06:48 +0900 Subject: [PATCH 2/9] feat(observability): add usage analysis command --- README.md | 1 + docs/README.ja.md | 1 + docs/configuration.ja.md | 1 + docs/configuration.md | 1 + docs/observability.ja.md | 74 ++++ docs/observability.md | 74 ++++ package.json | 1 + src/__tests__/analyze-usage-command.test.ts | 147 ++++++++ src/commands/analyze-usage.ts | 373 ++++++++++++++++++++ 9 files changed, 673 insertions(+) create mode 100644 docs/observability.ja.md create mode 100644 docs/observability.md create mode 100644 src/__tests__/analyze-usage-command.test.ts create mode 100644 src/commands/analyze-usage.ts diff --git a/README.md b/README.md index cafeb7687..ec58767f7 100644 --- a/README.md +++ b/README.md @@ -284,6 +284,7 @@ See [External Integrations](./docs/external-integrations.md) for other community | [Tutorial](./docs/tutorial.md) | Improve one example over three phases while queuing, running, and inspecting tasks | | [CLI Reference](./docs/cli-reference.md) | All commands and options | | [Configuration](./docs/configuration.md) | Global and project settings | +| [Observability](./docs/observability.md) | Phase-level usage events and analysis workflow | | [Design Philosophy](./docs/design-philosophy.md) | Why TAKT is built around workflows, facets, feedback loops, and traceability | | [Workflow Guide](./docs/workflows.md) | Creating and customizing workflows | | [Builtin Catalog](./docs/builtin-catalog.md) | All builtin workflows and personas | diff --git a/docs/README.ja.md b/docs/README.ja.md index 45ceff5e2..b8d867b79 100644 --- a/docs/README.ja.md +++ b/docs/README.ja.md @@ -287,6 +287,7 @@ npx create-takt-sdd | [チュートリアル](./tutorial.ja.md) | 3 フェーズで題材を改良しながら、タスクを積み、実行し、結果を確認する流れ | | [CLI Reference](./cli-reference.ja.md) | 全コマンド・オプション | | [Configuration](./configuration.ja.md) | グローバル設定・プロジェクト設定 | +| [Observability](./observability.ja.md) | phase 粒度の usage events と集計 workflow | | [設計思想](./design-philosophy.ja.md) | TAKT が workflow、facet、フィードバックループ、追跡性を重視する理由 | | [Workflow Guide](./workflows.ja.md) | workflow の作成・カスタマイズ | | [Builtin Catalog](./builtin-catalog.ja.md) | ビルトイン workflow・persona の一覧 | diff --git a/docs/configuration.ja.md b/docs/configuration.ja.md index 7394913a5..463381dee 100644 --- a/docs/configuration.ja.md +++ b/docs/configuration.ja.md @@ -3,6 +3,7 @@ [English](./configuration.md) このドキュメントは TAKT の全設定オプションのリファレンスです。クイックスタートについては [README](../README.md) を参照してください。 +phase 粒度の usage events と集計方法は [Observability Guide](./observability.ja.md) を参照してください。 ## グローバル設定 diff --git a/docs/configuration.md b/docs/configuration.md index a543d1335..c7cd5cd7f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -3,6 +3,7 @@ [日本語](./configuration.ja.md) This document is a reference for all TAKT configuration options. For a quick start, see the main [README](../README.md). +For phase-level usage events and analysis, see the [Observability Guide](./observability.md). ## Global Configuration diff --git a/docs/observability.ja.md b/docs/observability.ja.md new file mode 100644 index 000000000..51e9b2062 --- /dev/null +++ b/docs/observability.ja.md @@ -0,0 +1,74 @@ +# Observability + +[English](./observability.md) + +TAKT の observability は opt-in です。無効時は workflow 実行、session log、provider events、既存の `logging.usage_events` 出力の挙動を変えません。 + +## Phase Usage Events を有効化する + +`~/.takt/config.yaml` または `.takt/config.yaml` に次を追加します。 + +```yaml +observability: + enabled: true + usage_events_phase: true +``` + +phase 粒度の usage events は次に出力されます。 + +```text +.takt/runs//logs/-usage-events.phase.jsonl +``` + +この出力は既存の `logging.usage_events` とは別ファイルです。`logs/-usage-events.jsonl` は置き換えません。 + +## イベント粒度 + +record は workflow phase ごとに分かれます。 + +| Phase | 意味 | +|-------|------| +| `phase1_execute` | step 本体の実行 | +| `phase2_report` | output contract / report 生成 | +| `phase3_structured` | structured output による status judgment | +| `phase3_tag` | tag fallback による status judgment | +| `phase3_fallback` | AI judge fallback による status judgment | + +usage を取得できない場合は `usage_missing: true` と reason を記録します。分析コマンドでは missing usage を 0 token として扱わず、token 統計から除外します。 + +## Usage を集計する + +先に build します。 + +```bash +npm run build +``` + +その後、ファイルまたは run directory を渡して集計します。 + +```bash +npm run analyze:usage -- .takt/runs//logs/*-usage-events.phase.jsonl +npm run analyze:usage -- .takt/runs/ +``` + +デフォルト出力は `step x phase x provider x model` で集計した Markdown table です。 + +CSV が必要な場合は `--format csv` を使います。 + +```bash +npm run analyze:usage -- --format csv .takt/runs/ > usage.csv +``` + +出力列は次の通りです。 + +| Column | 意味 | +|--------|------| +| `step` / `phase` / `provider` / `model` | 集計キー | +| `runs` | unique な `run_id` 数 | +| `calls` | phase usage record 数 | +| `missing` | usage を取得できなかった record 数 | +| `input_tokens` / `output_tokens` / `total_tokens` | usage を取得できた record の token 合計 | +| `cached_input_tokens` / `cache_creation_input_tokens` / `cache_read_input_tokens` | cache 関連 token 合計 | +| `avg_total_tokens` / `median_total_tokens` / `stddev_total_tokens` | missing usage を除外した call 単位の total token 統計 | + +before/after 比較では、それぞれの run directory 群に対して別々にコマンドを実行し、出力された table または CSV を比較します。 diff --git a/docs/observability.md b/docs/observability.md new file mode 100644 index 000000000..9cd32b25c --- /dev/null +++ b/docs/observability.md @@ -0,0 +1,74 @@ +# Observability + +[日本語](./observability.ja.md) + +TAKT observability is opt-in. When disabled, workflow execution, session logs, provider events, and the existing `logging.usage_events` output keep their current behavior. + +## Enable Phase Usage Events + +Add this to `~/.takt/config.yaml` or `.takt/config.yaml`: + +```yaml +observability: + enabled: true + usage_events_phase: true +``` + +This writes phase-level usage events to: + +```text +.takt/runs//logs/-usage-events.phase.jsonl +``` + +The phase usage stream is separate from the existing `logging.usage_events` file. It does not replace `logs/-usage-events.jsonl`. + +## Event Granularity + +Records are grouped by workflow phase: + +| Phase | Meaning | +|-------|---------| +| `phase1_execute` | Main step execution | +| `phase2_report` | Output contract/report generation | +| `phase3_structured` | Structured status judgment | +| `phase3_tag` | Tag fallback status judgment | +| `phase3_fallback` | AI judge fallback status judgment | + +Missing usage is recorded with `usage_missing: true` and a reason. Missing usage is not treated as zero tokens by the analysis command. + +## Analyze Usage + +Build the project first: + +```bash +npm run build +``` + +Then aggregate one or more files or run directories: + +```bash +npm run analyze:usage -- .takt/runs//logs/*-usage-events.phase.jsonl +npm run analyze:usage -- .takt/runs/ +``` + +The default output is a Markdown table grouped by `step x phase x provider x model`. + +Use CSV output for spreadsheets or downstream scripts: + +```bash +npm run analyze:usage -- --format csv .takt/runs/ > usage.csv +``` + +The output columns are: + +| Column | Meaning | +|--------|---------| +| `step` / `phase` / `provider` / `model` | Aggregation key | +| `runs` | Unique `run_id` count | +| `calls` | Number of phase usage records | +| `missing` | Records with unavailable usage | +| `input_tokens` / `output_tokens` / `total_tokens` | Token totals for records with usage | +| `cached_input_tokens` / `cache_creation_input_tokens` / `cache_read_input_tokens` | Cache-related token totals | +| `avg_total_tokens` / `median_total_tokens` / `stddev_total_tokens` | Per-call total token statistics, excluding missing usage | + +For before/after comparisons, run the command separately for each set of run directories and compare the resulting tables or CSV files. diff --git a/package.json b/package.json index 6e60a9c2e..d195cf226 100644 --- a/package.json +++ b/package.json @@ -12,6 +12,7 @@ "scripts": { "build": "tsc && mkdir -p dist/shared/prompts/en dist/shared/prompts/ja dist/shared/i18n dist/core/runtime/presets && cp src/shared/prompts/en/*.md dist/shared/prompts/en/ && cp src/shared/prompts/ja/*.md dist/shared/prompts/ja/ && cp src/shared/i18n/labels_en.yaml src/shared/i18n/labels_ja.yaml dist/shared/i18n/ && cp src/core/runtime/presets/*.sh dist/core/runtime/presets/", "watch": "tsc --watch", + "analyze:usage": "node dist/commands/analyze-usage.js", "test": "vitest run", "test:watch": "vitest", "test:e2e": "tmp=\"$(mktemp -t takt-e2e.XXXXXX)\"; npm run test:e2e:mock >\"$tmp\" 2>&1; code=$?; cat \"$tmp\"; if grep -q \"error connecting to api.github.com\" \"$tmp\"; then echo \"[takt] GitHub connectivity error detected in E2E output\"; code=1; fi; rm -f \"$tmp\"; if [ \"$code\" -eq 0 ]; then msg='test:e2e passed'; else msg=\"test:e2e failed (exit=$code)\"; fi; if command -v osascript >/dev/null 2>&1; then osascript -e \"display notification \\\"$msg\\\" with title \\\"takt\\\" subtitle \\\"E2E\\\"\" >/dev/null 2>&1 || true; fi; echo \"[takt] $msg\"; exit $code", diff --git a/src/__tests__/analyze-usage-command.test.ts b/src/__tests__/analyze-usage-command.test.ts new file mode 100644 index 000000000..d6538020b --- /dev/null +++ b/src/__tests__/analyze-usage-command.test.ts @@ -0,0 +1,147 @@ +import { mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, describe, expect, it } from 'vitest'; +import { + analyzeUsage, + formatUsageAnalysis, + resolvePhaseUsageFiles, +} from '../commands/analyze-usage.js'; + +const tempDirs = new Set(); + +function createTempDir(): string { + const dir = mkdtempSync(join(tmpdir(), 'takt-analyze-usage-')); + tempDirs.add(dir); + return dir; +} + +afterEach(() => { + for (const dir of tempDirs) { + rmSync(dir, { recursive: true, force: true }); + } + tempDirs.clear(); +}); + +describe('analyze usage command', () => { + it('resolves phase usage files from files, logs directories, and run directories', () => { + const root = createTempDir(); + const runDir = join(root, 'run-1'); + const logsDir = join(runDir, 'logs'); + mkdirSync(logsDir, { recursive: true }); + const directFile = join(root, 'direct-usage-events.phase.jsonl'); + const nestedFile = join(logsDir, 'session-usage-events.phase.jsonl'); + writeFileSync(directFile, '', 'utf-8'); + writeFileSync(nestedFile, '', 'utf-8'); + writeFileSync(join(logsDir, 'session-usage-events.jsonl'), '', 'utf-8'); + + expect(resolvePhaseUsageFiles([directFile, runDir])).toEqual([ + directFile, + nestedFile, + ].sort()); + expect(resolvePhaseUsageFiles([logsDir])).toEqual([nestedFile]); + }); + + it('aggregates by step, phase, provider, and model while excluding missing usage from token stats', () => { + const root = createTempDir(); + const file = join(root, 'session-usage-events.phase.jsonl'); + writeFileSync(file, [ + record({ run_id: 'run-a', total_tokens: 10, input_tokens: 6, output_tokens: 4 }), + record({ run_id: 'run-b', total_tokens: 20, input_tokens: 12, output_tokens: 8 }), + record({ run_id: 'run-b', usage_missing: true, reason: 'usage_not_available', usage: {} }), + record({ + run_id: 'run-b', + step: 'review', + phase: 'phase2_report', + total_tokens: 5, + input_tokens: 3, + output_tokens: 2, + }), + ].join('\n') + '\n', 'utf-8'); + + const rows = analyzeUsage([file]); + + expect(rows).toEqual([ + expect.objectContaining({ + step: 'implement', + phase: 'phase1_execute', + provider: 'mock', + model: 'mock-model', + runs: 2, + calls: 3, + missing: 1, + inputTokens: 18, + outputTokens: 12, + totalTokens: 30, + avgTotalTokens: 15, + medianTotalTokens: 15, + stddevTotalTokens: 5, + }), + expect.objectContaining({ + step: 'review', + phase: 'phase2_report', + runs: 1, + calls: 1, + totalTokens: 5, + }), + ]); + }); + + it('formats markdown and csv output', () => { + const rows = [{ + step: 'implement', + phase: 'phase1_execute', + provider: 'mock', + model: 'mock,model', + runs: 1, + calls: 2, + missing: 0, + inputTokens: 3, + outputTokens: 2, + totalTokens: 5, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + avgTotalTokens: 2.5, + medianTotalTokens: 2.5, + stddevTotalTokens: 0.5, + }]; + + expect(formatUsageAnalysis(rows)).toContain('| implement | phase1_execute | mock | mock,model |'); + expect(formatUsageAnalysis(rows, 'csv')).toContain('implement,phase1_execute,mock,"mock,model"'); + }); + + it('throws with line information for invalid JSON', () => { + const root = createTempDir(); + const file = join(root, 'session-usage-events.phase.jsonl'); + writeFileSync(file, '{bad json}\n', 'utf-8'); + + expect(() => analyzeUsage([file])).toThrow(`Invalid JSON in ${file}:1`); + expect(readFileSync(file, 'utf-8')).toBe('{bad json}\n'); + }); +}); + +function record(overrides: Record): string { + const usageMissing = overrides.usage_missing === true; + return JSON.stringify({ + run_id: 'run-a', + session_id: 'session-a', + provider: 'mock', + provider_model: 'mock-model', + step: 'implement', + step_type: 'normal', + phase: 'phase1_execute', + phase_name: 'execute', + timestamp: '2026-06-06T00:00:00.000Z', + success: !usageMissing, + usage_missing: usageMissing, + usage: usageMissing + ? {} + : { + input_tokens: overrides.input_tokens, + output_tokens: overrides.output_tokens, + total_tokens: overrides.total_tokens, + }, + ...overrides, + }); +} diff --git a/src/commands/analyze-usage.ts b/src/commands/analyze-usage.ts new file mode 100644 index 000000000..d95fe047d --- /dev/null +++ b/src/commands/analyze-usage.ts @@ -0,0 +1,373 @@ +import { existsSync, lstatSync, readFileSync, readdirSync } from 'node:fs'; +import { basename, join, resolve } from 'node:path'; +import { pathToFileURL } from 'node:url'; +import type { PhaseUsageEventLogRecord } from '../core/logging/phaseUsageEvent.js'; +import { PHASE_USAGE_EVENTS_LOG_FILE_SUFFIX } from '../core/logging/contracts.js'; + +export type UsageAnalysisFormat = 'markdown' | 'csv'; + +export interface UsageAnalysisOptions { + format?: UsageAnalysisFormat; +} + +export interface UsageAnalysisRow { + step: string; + phase: string; + provider: string; + model: string; + runs: number; + calls: number; + missing: number; + inputTokens: number; + outputTokens: number; + totalTokens: number; + cachedInputTokens: number; + cacheCreationInputTokens: number; + cacheReadInputTokens: number; + avgTotalTokens: number; + medianTotalTokens: number; + stddevTotalTokens: number; +} + +interface UsageGroup { + key: string; + step: string; + phase: string; + provider: string; + model: string; + runs: Set; + calls: number; + missing: number; + inputTokens: number; + outputTokens: number; + totalTokens: number; + cachedInputTokens: number; + cacheCreationInputTokens: number; + cacheReadInputTokens: number; + totalSamples: number[]; +} + +const MARKDOWN_COLUMNS: Array<[string, keyof UsageAnalysisRow]> = [ + ['step', 'step'], + ['phase', 'phase'], + ['provider', 'provider'], + ['model', 'model'], + ['runs', 'runs'], + ['calls', 'calls'], + ['missing', 'missing'], + ['input_tokens', 'inputTokens'], + ['output_tokens', 'outputTokens'], + ['total_tokens', 'totalTokens'], + ['cached_input_tokens', 'cachedInputTokens'], + ['cache_creation_input_tokens', 'cacheCreationInputTokens'], + ['cache_read_input_tokens', 'cacheReadInputTokens'], + ['avg_total_tokens', 'avgTotalTokens'], + ['median_total_tokens', 'medianTotalTokens'], + ['stddev_total_tokens', 'stddevTotalTokens'], +]; + +export function analyzeUsage(inputs: string[]): UsageAnalysisRow[] { + const files = resolvePhaseUsageFiles(inputs); + const groups = new Map(); + + for (const file of files) { + for (const record of readPhaseUsageRecords(file)) { + const key = groupKey(record); + let group = groups.get(key); + if (!group) { + group = { + key, + step: record.step, + phase: record.phase, + provider: record.provider, + model: record.provider_model, + runs: new Set(), + calls: 0, + missing: 0, + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + cachedInputTokens: 0, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + totalSamples: [], + }; + groups.set(key, group); + } + addRecord(group, record); + } + } + + return [...groups.values()] + .sort((a, b) => a.key.localeCompare(b.key)) + .map(groupToRow); +} + +export function formatUsageAnalysis(rows: UsageAnalysisRow[], format: UsageAnalysisFormat = 'markdown'): string { + return format === 'csv' ? formatCsv(rows) : formatMarkdown(rows); +} + +export function resolvePhaseUsageFiles(inputs: string[]): string[] { + const files = new Set(); + for (const input of inputs) { + const target = resolve(input); + if (!existsSync(target)) { + throw new Error(`Input path does not exist: ${input}`); + } + const stat = lstatSync(target); + if (stat.isDirectory()) { + for (const file of listPhaseUsageFilesInDirectory(target)) { + files.add(file); + } + } else if (stat.isFile() && isPhaseUsageFile(target)) { + files.add(target); + } + } + return [...files].sort(); +} + +function listPhaseUsageFilesInDirectory(dir: string): string[] { + const candidates = [ + ...listDirectPhaseUsageFiles(dir), + ...listDirectPhaseUsageFiles(join(dir, 'logs')), + ]; + return candidates; +} + +function listDirectPhaseUsageFiles(dir: string): string[] { + if (!existsSync(dir) || !lstatSync(dir).isDirectory()) { + return []; + } + return readdirSync(dir) + .filter((entry) => isPhaseUsageFile(entry)) + .map((entry) => join(dir, entry)); +} + +function isPhaseUsageFile(path: string): boolean { + return basename(path).endsWith(PHASE_USAGE_EVENTS_LOG_FILE_SUFFIX); +} + +function readPhaseUsageRecords(file: string): PhaseUsageEventLogRecord[] { + const records: PhaseUsageEventLogRecord[] = []; + const content = readFileSync(file, 'utf-8'); + for (const [index, line] of content.split('\n').entries()) { + const trimmed = line.trim(); + if (!trimmed) { + continue; + } + try { + const record = JSON.parse(trimmed) as unknown; + if (isPhaseUsageRecord(record)) { + records.push(record); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid JSON in ${file}:${index + 1}: ${message}`); + } + } + return records; +} + +function isPhaseUsageRecord(value: unknown): value is PhaseUsageEventLogRecord { + if (value === null || typeof value !== 'object') { + return false; + } + const candidate = value as Partial; + return typeof candidate.run_id === 'string' + && typeof candidate.provider === 'string' + && typeof candidate.provider_model === 'string' + && typeof candidate.step === 'string' + && typeof candidate.phase === 'string' + && typeof candidate.usage_missing === 'boolean' + && candidate.usage !== null + && typeof candidate.usage === 'object'; +} + +function groupKey(record: PhaseUsageEventLogRecord): string { + return [ + record.step, + record.phase, + record.provider, + record.provider_model, + ].join('\u0000'); +} + +function addRecord(group: UsageGroup, record: PhaseUsageEventLogRecord): void { + group.runs.add(record.run_id); + group.calls += 1; + if (record.usage_missing) { + group.missing += 1; + return; + } + + const inputTokens = finiteNumber(record.usage.input_tokens); + const outputTokens = finiteNumber(record.usage.output_tokens); + const totalTokens = finiteNumber(record.usage.total_tokens); + if (inputTokens === undefined || outputTokens === undefined || totalTokens === undefined) { + group.missing += 1; + return; + } + + group.inputTokens += inputTokens; + group.outputTokens += outputTokens; + group.totalTokens += totalTokens; + group.cachedInputTokens += finiteNumber(record.usage.cached_input_tokens) ?? 0; + group.cacheCreationInputTokens += finiteNumber(record.usage.cache_creation_input_tokens) ?? 0; + group.cacheReadInputTokens += finiteNumber(record.usage.cache_read_input_tokens) ?? 0; + group.totalSamples.push(totalTokens); +} + +function groupToRow(group: UsageGroup): UsageAnalysisRow { + return { + step: group.step, + phase: group.phase, + provider: group.provider, + model: group.model, + runs: group.runs.size, + calls: group.calls, + missing: group.missing, + inputTokens: group.inputTokens, + outputTokens: group.outputTokens, + totalTokens: group.totalTokens, + cachedInputTokens: group.cachedInputTokens, + cacheCreationInputTokens: group.cacheCreationInputTokens, + cacheReadInputTokens: group.cacheReadInputTokens, + avgTotalTokens: average(group.totalSamples), + medianTotalTokens: median(group.totalSamples), + stddevTotalTokens: standardDeviation(group.totalSamples), + }; +} + +function finiteNumber(value: number | undefined): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +function average(values: number[]): number { + if (values.length === 0) { + return 0; + } + return values.reduce((sum, value) => sum + value, 0) / values.length; +} + +function median(values: number[]): number { + if (values.length === 0) { + return 0; + } + const sorted = [...values].sort((a, b) => a - b); + const middle = Math.floor(sorted.length / 2); + if (sorted.length % 2 === 1) { + return sorted[middle] ?? 0; + } + return ((sorted[middle - 1] ?? 0) + (sorted[middle] ?? 0)) / 2; +} + +function standardDeviation(values: number[]): number { + if (values.length === 0) { + return 0; + } + const avg = average(values); + const variance = values.reduce((sum, value) => sum + ((value - avg) ** 2), 0) / values.length; + return Math.sqrt(variance); +} + +function formatMarkdown(rows: UsageAnalysisRow[]): string { + if (rows.length === 0) { + return 'No phase usage events found.'; + } + const header = `| ${MARKDOWN_COLUMNS.map(([name]) => name).join(' | ')} |`; + const separator = `| ${MARKDOWN_COLUMNS.map(() => '---').join(' | ')} |`; + const body = rows.map((row) => + `| ${MARKDOWN_COLUMNS.map(([, key]) => formatValue(row[key])).join(' | ')} |` + ); + return [header, separator, ...body].join('\n'); +} + +function formatCsv(rows: UsageAnalysisRow[]): string { + const header = MARKDOWN_COLUMNS.map(([name]) => name).join(','); + const body = rows.map((row) => + MARKDOWN_COLUMNS.map(([, key]) => csvCell(formatValue(row[key]))).join(',') + ); + return [header, ...body].join('\n'); +} + +function formatValue(value: UsageAnalysisRow[keyof UsageAnalysisRow]): string { + if (typeof value === 'number') { + return Number.isInteger(value) ? String(value) : value.toFixed(2); + } + return value; +} + +function csvCell(value: string): string { + if (!/[",\n]/.test(value)) { + return value; + } + return `"${value.replaceAll('"', '""')}"`; +} + +function parseArgs(argv: string[]): { format: UsageAnalysisFormat; inputs: string[] } { + let format: UsageAnalysisFormat = 'markdown'; + const inputs: string[] = []; + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (arg === '--format') { + const value = argv[index + 1]; + if (value !== 'markdown' && value !== 'csv') { + throw new Error('--format must be "markdown" or "csv"'); + } + format = value; + index += 1; + continue; + } + if (arg?.startsWith('--format=')) { + const value = arg.slice('--format='.length); + if (value !== 'markdown' && value !== 'csv') { + throw new Error('--format must be "markdown" or "csv"'); + } + format = value; + continue; + } + if (arg === '--help' || arg === '-h') { + throw new UsageHelp(); + } + if (arg?.startsWith('-')) { + throw new Error(`Unknown option: ${arg}`); + } + if (arg !== undefined) { + inputs.push(arg); + } + } + if (inputs.length === 0) { + throw new Error('At least one phase usage event file or run directory is required'); + } + return { format, inputs }; +} + +class UsageHelp extends Error {} + +function usageText(): string { + return [ + 'Usage: npm run analyze:usage -- [--format markdown|csv] ...', + '', + `Reads ${PHASE_USAGE_EVENTS_LOG_FILE_SUFFIX} files and aggregates by step, phase, provider, and model.`, + ].join('\n'); +} + +async function main(): Promise { + try { + const { format, inputs } = parseArgs(process.argv.slice(2)); + const rows = analyzeUsage(inputs); + process.stdout.write(`${formatUsageAnalysis(rows, format)}\n`); + } catch (error) { + if (error instanceof UsageHelp) { + process.stdout.write(`${usageText()}\n`); + return; + } + const message = error instanceof Error ? error.message : String(error); + process.stderr.write(`${message}\n\n${usageText()}\n`); + process.exitCode = 1; + } +} + +if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) { + await main(); +} From 79d59b790656077070673788c4c45f3e1a0d4c9f Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Sat, 6 Jun 2026 08:24:21 +0900 Subject: [PATCH 3/9] fix(observability): tighten phase usage boundaries --- src/__tests__/analyze-usage-command.test.ts | 8 +++++ .../usageEventsSpanProcessor.test.ts | 12 ++------ src/commands/analyze-usage.ts | 7 ++++- src/core/logging/phaseUsageEvent.ts | 14 ++------- src/infra/fs/index.ts | 4 +++ src/infra/fs/jsonl.ts | 5 ++++ .../observability/usageEventsSpanProcessor.ts | 10 ++----- src/shared/types/provider.ts | 29 ++++++++++++------- 8 files changed, 50 insertions(+), 39 deletions(-) create mode 100644 src/infra/fs/jsonl.ts diff --git a/src/__tests__/analyze-usage-command.test.ts b/src/__tests__/analyze-usage-command.test.ts index d6538020b..d6df99cef 100644 --- a/src/__tests__/analyze-usage-command.test.ts +++ b/src/__tests__/analyze-usage-command.test.ts @@ -119,6 +119,14 @@ describe('analyze usage command', () => { expect(() => analyzeUsage([file])).toThrow(`Invalid JSON in ${file}:1`); expect(readFileSync(file, 'utf-8')).toBe('{bad json}\n'); }); + + it('rejects explicit files that are not phase usage event files', () => { + const root = createTempDir(); + const file = join(root, 'session-usage-events.jsonl'); + writeFileSync(file, '', 'utf-8'); + + expect(() => resolvePhaseUsageFiles([file])).toThrow(`Input file is not a phase usage event file: ${file}`); + }); }); function record(overrides: Record): string { diff --git a/src/__tests__/usageEventsSpanProcessor.test.ts b/src/__tests__/usageEventsSpanProcessor.test.ts index 444d8f354..d217295ce 100644 --- a/src/__tests__/usageEventsSpanProcessor.test.ts +++ b/src/__tests__/usageEventsSpanProcessor.test.ts @@ -63,7 +63,7 @@ describe('UsageEventsSpanProcessor', () => { ]); }); - it('ignores duplicate runId registrations', () => { + it('rejects duplicate runId registrations', () => { const firstLogPath = createTempLogPath('first-usage-events.phase.jsonl'); const secondLogPath = createTempLogPath('second-usage-events.phase.jsonl'); const processor = new UsageEventsSpanProcessor(); @@ -73,17 +73,11 @@ describe('UsageEventsSpanProcessor', () => { sessionId: 'session-1', phaseUsageLogPath: firstLogPath, }); - processor.register({ + expect(() => processor.register({ runId: 'run-1', sessionId: 'session-duplicate', phaseUsageLogPath: secondLogPath, - }); - - processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); - - expect(readRecords(firstLogPath)[0]).toEqual(expect.objectContaining({ - session_id: 'session-1', - })); + })).toThrow('Phase usage event exporter is already registered for runId: run-1'); expect(existsSync(secondLogPath)).toBe(false); }); diff --git a/src/commands/analyze-usage.ts b/src/commands/analyze-usage.ts index d95fe047d..50ef5b99e 100644 --- a/src/commands/analyze-usage.ts +++ b/src/commands/analyze-usage.ts @@ -119,7 +119,12 @@ export function resolvePhaseUsageFiles(inputs: string[]): string[] { for (const file of listPhaseUsageFilesInDirectory(target)) { files.add(file); } - } else if (stat.isFile() && isPhaseUsageFile(target)) { + continue; + } + if (stat.isFile()) { + if (!isPhaseUsageFile(target)) { + throw new Error(`Input file is not a phase usage event file: ${input}`); + } files.add(target); } } diff --git a/src/core/logging/phaseUsageEvent.ts b/src/core/logging/phaseUsageEvent.ts index 6cec10ff8..b586a430c 100644 --- a/src/core/logging/phaseUsageEvent.ts +++ b/src/core/logging/phaseUsageEvent.ts @@ -4,7 +4,7 @@ import { type UsageMissingReason, } from './contracts.js'; import type { StepType } from './providerEvent.js'; -import type { ProviderType } from '../../shared/types/provider.js'; +import { isProviderType, type ProviderType } from '../../shared/types/provider.js'; export type PhaseUsageType = | 'phase1_execute' @@ -252,17 +252,7 @@ function getNumber(attributes: Record, key: string): number | u function getProvider(attributes: Record, key: string): ProviderType | undefined { const value = getString(attributes, key); - return value === 'claude' - || value === 'claude-sdk' - || value === 'claude-terminal' - || value === 'codex' - || value === 'opencode' - || value === 'cursor' - || value === 'copilot' - || value === 'kiro' - || value === 'mock' - ? value - : undefined; + return isProviderType(value) ? value : undefined; } function getStepType(attributes: Record, key: string): StepType | undefined { diff --git a/src/infra/fs/index.ts b/src/infra/fs/index.ts index e5bf88388..3f637165f 100644 --- a/src/infra/fs/index.ts +++ b/src/infra/fs/index.ts @@ -17,6 +17,10 @@ export type { NdjsonRecord, } from './session.js'; +export { + appendJsonLine, +} from './jsonl.js'; + export { SessionManager, appendNdjsonLine, diff --git a/src/infra/fs/jsonl.ts b/src/infra/fs/jsonl.ts new file mode 100644 index 000000000..bad4d76ab --- /dev/null +++ b/src/infra/fs/jsonl.ts @@ -0,0 +1,5 @@ +import { appendFileSync } from 'node:fs'; + +export function appendJsonLine(filepath: string, record: object): void { + appendFileSync(filepath, `${JSON.stringify(record)}\n`, 'utf-8'); +} diff --git a/src/infra/observability/usageEventsSpanProcessor.ts b/src/infra/observability/usageEventsSpanProcessor.ts index 2be5445d6..31523de5d 100644 --- a/src/infra/observability/usageEventsSpanProcessor.ts +++ b/src/infra/observability/usageEventsSpanProcessor.ts @@ -1,5 +1,5 @@ import type { ReadableSpan, SpanProcessor } from '@opentelemetry/sdk-trace-base'; -import { appendNdjsonLine, type NdjsonRecord } from '../fs/index.js'; +import { appendJsonLine } from '../fs/index.js'; import { createLogger } from '../../shared/utils/debug.js'; import { mapSpanEndToPhaseUsageEvent, @@ -27,11 +27,7 @@ export class UsageEventsSpanProcessor implements SpanProcessor { register(options: UsageEventsSpanProcessorOptions): () => void { if (this.registrations.has(options.runId)) { - log.warn('Ignoring duplicate phase usage event registration', { - runId: options.runId, - phaseUsageLogPath: options.phaseUsageLogPath, - }); - return () => {}; + throw new Error(`Phase usage event exporter is already registered for runId: ${options.runId}`); } this.registrations.set(options.runId, options); return () => { @@ -63,7 +59,7 @@ export class UsageEventsSpanProcessor implements SpanProcessor { return; } try { - appendNdjsonLine(options.phaseUsageLogPath, record as unknown as NdjsonRecord); + appendJsonLine(options.phaseUsageLogPath, record); } catch (error) { if (this.hasReportedWriteFailure) { return; diff --git a/src/shared/types/provider.ts b/src/shared/types/provider.ts index 341dc2781..c0bbb4b20 100644 --- a/src/shared/types/provider.ts +++ b/src/shared/types/provider.ts @@ -1,15 +1,24 @@ import type { AgentFailureCategory } from './agent-failure.js'; -export type ProviderType = - | 'claude' - | 'claude-sdk' - | 'claude-terminal' - | 'codex' - | 'opencode' - | 'cursor' - | 'copilot' - | 'kiro' - | 'mock'; +export const PROVIDER_TYPES = [ + 'claude', + 'claude-sdk', + 'claude-terminal', + 'codex', + 'opencode', + 'cursor', + 'copilot', + 'kiro', + 'mock', +] as const; + +export type ProviderType = (typeof PROVIDER_TYPES)[number]; + +const PROVIDER_TYPE_SET: ReadonlySet = new Set(PROVIDER_TYPES); + +export function isProviderType(value: unknown): value is ProviderType { + return typeof value === 'string' && PROVIDER_TYPE_SET.has(value); +} export interface StreamInitEventData { model: string; From 628537dcdbfbccb2273758acdd1688f01cf84e38 Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Sat, 6 Jun 2026 09:46:38 +0900 Subject: [PATCH 4/9] fix(observability): roll back failed exporter registration --- src/__tests__/otelFoundation.test.ts | 64 +++++++++++++++++++++++ src/infra/observability/otelFoundation.ts | 35 +++++++++---- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/src/__tests__/otelFoundation.test.ts b/src/__tests__/otelFoundation.test.ts index 01855ca56..2cc454e35 100644 --- a/src/__tests__/otelFoundation.test.ts +++ b/src/__tests__/otelFoundation.test.ts @@ -339,6 +339,70 @@ describe('otel foundation', () => { } }); + it('should release the shared SDK when run exporter registration fails', async () => { + const foundation = await loadFoundationWithMockedSdk(); + const tempDir = mkdtempSync(join(tmpdir(), 'takt-otel-registration-failure-')); + const firstPhaseUsageLogPath = join(tempDir, 'first-usage-events.phase.jsonl'); + const secondShadowLogPath = join(tempDir, 'second-otel-session-shadow.jsonl'); + const secondPhaseUsageLogPath = join(tempDir, 'second-usage-events.phase.jsonl'); + let first: { shutdown(): Promise } | undefined; + + try { + first = await foundation.initializeOtelFoundation( + enabledUsageEventsPhaseObservability, + { + usageEventsExporter: { + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath: firstPhaseUsageLogPath, + }, + }, + ); + + await expect(foundation.initializeOtelFoundation( + enabledAllObservability, + { + sessionLogExporter: { + runId: 'run-2', + shadowLogPath: secondShadowLogPath, + sanitizedTask: 'second task', + workflowName: 'default', + }, + usageEventsExporter: { + runId: 'run-1', + sessionId: 'session-duplicate', + phaseUsageLogPath: secondPhaseUsageLogPath, + }, + }, + )).rejects.toThrow('Phase usage event exporter is already registered for runId: run-1'); + + const sessionLogProcessor = foundation.constructedOptions[0]?.spanProcessors?.[0] as { + onStart(span: unknown, parentContext: unknown): void; + }; + sessionLogProcessor.onStart({ + name: 'step.implement', + startTime: [1_778_777_205, 0], + attributes: { + 'takt.run.id': 'run-2', + 'takt.step.name': 'implement', + 'takt.step.persona': 'coder', + 'takt.step.iteration': 1, + }, + }, {}); + const secondShadowRecords = readFileSync(secondShadowLogPath, 'utf-8').trim().split('\n'); + expect(secondShadowRecords).toHaveLength(1); + + await first.shutdown(); + + expect(foundation.shutdownMock).toHaveBeenCalledOnce(); + } finally { + if (first) { + await first.shutdown(); + } + rmSync(tempDir, { recursive: true, force: true }); + } + }); + it('should route run-local session logs and monitor files through the shared SDK', async () => { const foundation = await loadFoundationWithMockedSdk(); const tempDir = mkdtempSync(join(tmpdir(), 'takt-otel-run-routing-')); diff --git a/src/infra/observability/otelFoundation.ts b/src/infra/observability/otelFoundation.ts index 16d25ab3e..e6995b1b0 100644 --- a/src/infra/observability/otelFoundation.ts +++ b/src/infra/observability/otelFoundation.ts @@ -50,7 +50,13 @@ export async function initializeOtelFoundation( } const shared = await acquireSdk(); - const registrations = registerRunExporters(shared, options); + let registrations: OtelRegistration[]; + try { + registrations = registerRunExporters(shared, options); + } catch (error) { + await releaseSdk(shared); + throw error; + } let released = false; return { @@ -120,18 +126,29 @@ function registerRunExporters( options: OtelFoundationOptions | undefined, ): OtelRegistration[] { const registrations: OtelRegistration[] = []; - if (options?.sessionLogExporter && shared.sessionLogSpanProcessor) { - registrations.push(shared.sessionLogSpanProcessor.register(options.sessionLogExporter)); - } - if (options?.usageEventsExporter && shared.usageEventsSpanProcessor) { - registrations.push(shared.usageEventsSpanProcessor.register(options.usageEventsExporter)); - } - if (options?.monitorJsonExporter && shared.monitorJsonMetricExporter) { - registrations.push(shared.monitorJsonMetricExporter.register(options.monitorJsonExporter)); + try { + if (options?.sessionLogExporter && shared.sessionLogSpanProcessor) { + registrations.push(shared.sessionLogSpanProcessor.register(options.sessionLogExporter)); + } + if (options?.usageEventsExporter && shared.usageEventsSpanProcessor) { + registrations.push(shared.usageEventsSpanProcessor.register(options.usageEventsExporter)); + } + if (options?.monitorJsonExporter && shared.monitorJsonMetricExporter) { + registrations.push(shared.monitorJsonMetricExporter.register(options.monitorJsonExporter)); + } + } catch (error) { + unregisterAll(registrations); + throw error; } return registrations; } +function unregisterAll(registrations: OtelRegistration[]): void { + for (const unregister of registrations.splice(0).reverse()) { + unregister(); + } +} + function createSpanProcessorState(): { spanProcessors: SpanProcessor[]; sessionLogSpanProcessor: SessionLogSpanProcessor; From 1cafa58e12f6c9489584e4520aeeb0a6ffe18864 Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Sat, 6 Jun 2026 14:14:21 +0900 Subject: [PATCH 5/9] test(observability): cover config-driven local exporters --- docs/testing/e2e.md | 7 + e2e/specs/observability.e2e.ts | 129 ++++++++++++++++++ src/__tests__/otelFoundation.test.ts | 2 +- src/__tests__/phaseUsageEvent.test.ts | 12 +- .../usageEventsSpanProcessor.test.ts | 2 +- src/core/logging/phaseUsageEvent.ts | 14 +- vitest.config.e2e.mock.ts | 1 + 7 files changed, 151 insertions(+), 16 deletions(-) create mode 100644 e2e/specs/observability.e2e.ts diff --git a/docs/testing/e2e.md b/docs/testing/e2e.md index 3434edcb9..3fb2f0f1a 100644 --- a/docs/testing/e2e.md +++ b/docs/testing/e2e.md @@ -99,6 +99,13 @@ E2Eテストを追加・変更した場合は、このドキュメントも更 - `takt --task 'Create a short report and finish' --workflow e2e/fixtures/workflows/report-judge.yaml --provider mock` を実行する。 - `TAKT_MOCK_SCENARIO=e2e/fixtures/scenarios/report-judge.json` を設定する。 - 出力に `Workflow completed` が含まれることを確認する。 +- Observability file outputs(`e2e/specs/observability.e2e.ts`) + - 目的: observability の全ローカルファイル出力を config だけで有効化できることを確認。 + - LLM: 呼び出さない(`--provider mock` 固定) + - 手順(ユーザー行動/コマンド): + - E2E用 `config.yaml` に `observability.enabled: true`, `usage_events_phase: true`, `monitor: true`, `session_log_exporter: true` を設定する。 + - `takt --task 'Create a short report and finish' --workflow e2e/fixtures/workflows/report-judge.yaml --provider mock` を実行する。 + - run配下に `*-usage-events.phase.jsonl`, `*-otel-session-shadow.jsonl`, `monitor.json` が出力されることを確認する。 - Add task(`e2e/specs/add.e2e.ts`) - 目的: `takt add` がIssue参照からタスクファイルを生成できることを確認。 - LLM: 呼び出さない(`provider: mock` + `TAKT_MOCK_SCENARIO` 固定) diff --git a/e2e/specs/observability.e2e.ts b/e2e/specs/observability.e2e.ts new file mode 100644 index 000000000..e3de9f1cb --- /dev/null +++ b/e2e/specs/observability.e2e.ts @@ -0,0 +1,129 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { readdirSync, readFileSync } from 'node:fs'; +import { dirname, join, resolve } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { + createIsolatedEnv, + updateIsolatedConfig, + type IsolatedEnv, +} from '../helpers/isolated-env'; +import { createLocalRepo, type LocalRepo } from '../helpers/test-repo'; +import { runTakt } from '../helpers/takt-runner'; +import { copyWorkflowFixtureToRepo } from '../helpers/local-workflow-fixture'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +type JsonRecord = Record; + +function readJsonl(path: string): JsonRecord[] { + return readFileSync(path, 'utf-8') + .trim() + .split('\n') + .filter((line) => line.length > 0) + .map((line) => JSON.parse(line) as JsonRecord); +} + +function firstRunRoot(repoPath: string): string { + const runsDir = join(repoPath, '.takt', 'runs'); + const runDirs = readdirSync(runsDir).sort(); + const runDir = runDirs[0]; + if (!runDir) { + throw new Error('Run directory not found'); + } + return join(runsDir, runDir); +} + +function findLogFile(runRoot: string, suffix: string): string { + const logsDir = join(runRoot, 'logs'); + const entries = readdirSync(logsDir); + const file = entries.find((entry) => entry.endsWith(suffix)); + if (!file) { + throw new Error(`Log file not found: *${suffix}; logs: ${entries.join(', ')}`); + } + return join(logsDir, file); +} + +// E2E更新時は docs/testing/e2e.md も更新すること +describe('E2E: Observability file outputs (mock)', () => { + let isolatedEnv: IsolatedEnv; + let testRepo: LocalRepo; + + beforeEach(() => { + isolatedEnv = createIsolatedEnv(); + updateIsolatedConfig(isolatedEnv.taktDir, { + observability: { + enabled: true, + usage_events_phase: true, + monitor: true, + session_log_exporter: true, + }, + }); + testRepo = createLocalRepo(); + }); + + afterEach(() => { + try { + testRepo.cleanup(); + } catch { + // best-effort + } + try { + isolatedEnv.cleanup(); + } catch { + // best-effort + } + }); + + it('should write phase usage events, shadow session log, and monitor JSON from config only', () => { + const workflowPath = copyWorkflowFixtureToRepo( + testRepo.path, + resolve(__dirname, '../fixtures/workflows/report-judge.yaml'), + ); + const scenarioPath = resolve(__dirname, '../fixtures/scenarios/report-judge.json'); + + const result = runTakt({ + args: [ + '--task', 'Create a short report and finish', + '--workflow', workflowPath, + '--provider', 'mock', + ], + cwd: testRepo.path, + env: { + ...isolatedEnv.env, + TAKT_MOCK_SCENARIO: scenarioPath, + }, + timeout: 240_000, + }); + + expect(result.exitCode).toBe(0); + + const runRoot = firstRunRoot(testRepo.path); + const phaseUsagePath = findLogFile(runRoot, '-usage-events.phase.jsonl'); + const shadowLogPath = findLogFile(runRoot, '-otel-session-shadow.jsonl'); + const monitorPath = join(runRoot, 'monitor.json'); + + const phaseUsageRecords = readJsonl(phaseUsagePath); + expect(phaseUsageRecords.length).toBeGreaterThan(0); + const phases = new Set(phaseUsageRecords.map((record) => record.phase)); + expect(phases.has('phase1_execute')).toBe(true); + expect(phases.has('phase2_report')).toBe(true); + expect([...phases].some((phase) => typeof phase === 'string' && phase.startsWith('phase3_'))).toBe(true); + expect(phaseUsageRecords[0]).toEqual(expect.objectContaining({ + step: expect.any(String), + provider: 'mock', + provider_model: expect.any(String), + step_type: 'agent', + usage_missing: expect.any(Boolean), + usage: expect.any(Object), + })); + + const shadowRecords = readJsonl(shadowLogPath); + expect(shadowRecords.some((record) => record.type === 'workflow_start')).toBe(true); + expect(shadowRecords.some((record) => record.type === 'workflow_complete')).toBe(true); + + const monitor = JSON.parse(readFileSync(monitorPath, 'utf-8')) as unknown; + expect(monitor).toBeTruthy(); + expect(JSON.stringify(monitor)).toContain('"takt.run.id"'); + }, 240_000); +}); diff --git a/src/__tests__/otelFoundation.test.ts b/src/__tests__/otelFoundation.test.ts index 2cc454e35..6bcf2f3c5 100644 --- a/src/__tests__/otelFoundation.test.ts +++ b/src/__tests__/otelFoundation.test.ts @@ -312,7 +312,7 @@ describe('otel foundation', () => { 'takt.provider.name': 'mock', 'takt.model.name': 'mock-model', 'takt.step.name': 'implement', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.number': 1, 'takt.phase.name': 'execute', 'takt.phase.status': 'done', diff --git a/src/__tests__/phaseUsageEvent.test.ts b/src/__tests__/phaseUsageEvent.test.ts index ddc2ea0b0..648f10200 100644 --- a/src/__tests__/phaseUsageEvent.test.ts +++ b/src/__tests__/phaseUsageEvent.test.ts @@ -19,7 +19,7 @@ describe('phase usage event mapper', () => { 'takt.provider.name': 'codex', 'takt.model.name': 'gpt-5', 'takt.step.name': 'implement', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.number': 1, 'takt.phase.name': 'execute', 'takt.phase.execution_id': 'implement:1:1:1', @@ -40,7 +40,7 @@ describe('phase usage event mapper', () => { provider: 'codex', provider_model: 'gpt-5', step: 'implement', - step_type: 'normal', + step_type: 'agent', phase: 'phase1_execute', phase_name: 'execute', phase_execution_id: 'implement:1:1:1', @@ -65,7 +65,7 @@ describe('phase usage event mapper', () => { attributes: { 'takt.provider.name': 'claude', 'takt.step.name': 'review', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.number': 2, 'takt.phase.name': 'report', 'takt.phase.status': 'error', @@ -94,7 +94,7 @@ describe('phase usage event mapper', () => { 'takt.provider.name': 'claude-sdk', 'takt.model.name': 'claude-sonnet-4', 'takt.step.name': 'implement', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.execution_id': 'implement:3:1:1', 'takt.judge.stage': 3, 'takt.judge.method': 'ai_judge', @@ -125,7 +125,7 @@ describe('phase usage event mapper', () => { attributes: { 'takt.provider.name': 'codex', 'takt.step.name': 'implement', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.number': 3, 'takt.phase.name': 'judge', }, @@ -148,7 +148,7 @@ describe('phase usage event mapper', () => { attributes: { 'takt.provider.name': 'mock', 'takt.step.name': 'implement', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.number': 1, 'takt.phase.name': 'execute', 'takt.phase.status': 'done', diff --git a/src/__tests__/usageEventsSpanProcessor.test.ts b/src/__tests__/usageEventsSpanProcessor.test.ts index d217295ce..1b1f83632 100644 --- a/src/__tests__/usageEventsSpanProcessor.test.ts +++ b/src/__tests__/usageEventsSpanProcessor.test.ts @@ -104,7 +104,7 @@ function makePhaseSpan(runId: string): Record { 'takt.provider.name': 'mock', 'takt.model.name': 'mock-model', 'takt.step.name': 'implement', - 'takt.step.type': 'normal', + 'takt.step.type': 'agent', 'takt.phase.number': 1, 'takt.phase.name': 'execute', 'takt.phase.status': 'done', diff --git a/src/core/logging/phaseUsageEvent.ts b/src/core/logging/phaseUsageEvent.ts index b586a430c..21622c457 100644 --- a/src/core/logging/phaseUsageEvent.ts +++ b/src/core/logging/phaseUsageEvent.ts @@ -3,7 +3,6 @@ import { USAGE_MISSING_REASONS, type UsageMissingReason, } from './contracts.js'; -import type { StepType } from './providerEvent.js'; import { isProviderType, type ProviderType } from '../../shared/types/provider.js'; export type PhaseUsageType = @@ -13,6 +12,7 @@ export type PhaseUsageType = | 'phase3_tag' | 'phase3_fallback'; +export type PhaseUsageStepType = 'agent' | 'system' | 'workflow_call'; type PhaseName = 'execute' | 'report' | 'judge'; type JudgeMethod = 'structured_output' | 'phase3_tag' | 'ai_judge'; type JudgeStage = 1 | 2 | 3; @@ -23,7 +23,7 @@ export interface PhaseUsageEventLogRecord { provider: ProviderType; provider_model: string; step: string; - step_type: StepType; + step_type: PhaseUsageStepType; phase: PhaseUsageType; phase_name: PhaseName; phase_execution_id?: string; @@ -52,7 +52,7 @@ interface PhaseUsageMeta { provider: ProviderType; providerModel: string; step: string; - stepType: StepType; + stepType: PhaseUsageStepType; phase: PhaseUsageType; phaseName: PhaseName; phaseExecutionId?: string; @@ -255,12 +255,10 @@ function getProvider(attributes: Record, key: string): Provider return isProviderType(value) ? value : undefined; } -function getStepType(attributes: Record, key: string): StepType | undefined { +function getStepType(attributes: Record, key: string): PhaseUsageStepType | undefined { const value = getString(attributes, key); - return value === 'normal' - || value === 'parallel' - || value === 'arpeggio' - || value === 'team_leader' + return value === 'agent' + || value === 'system' || value === 'workflow_call' ? value : undefined; diff --git a/vitest.config.e2e.mock.ts b/vitest.config.e2e.mock.ts index d06b54701..dc353cacf 100644 --- a/vitest.config.e2e.mock.ts +++ b/vitest.config.e2e.mock.ts @@ -9,6 +9,7 @@ export default defineConfig({ 'e2e/specs/pipeline-skip-git.e2e.ts', 'e2e/specs/pipeline-local-repo.e2e.ts', 'e2e/specs/report-judge.e2e.ts', + 'e2e/specs/observability.e2e.ts', 'e2e/specs/report-file-output.e2e.ts', 'e2e/specs/team-leader-worker-pool.e2e.ts', 'e2e/specs/add.e2e.ts', From b6a55324378f0d3cf02e01c8b715e46e07bb986b Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Sat, 6 Jun 2026 17:30:02 +0900 Subject: [PATCH 6/9] fix(observability): harden phase usage exporter plumbing --- src/__tests__/logging-contracts.test.ts | 2 + src/__tests__/otelFoundation.test.ts | 58 +++++------------ .../usageEventsSpanProcessor.test.ts | 14 ++++- src/agents/judge-status-usecase.ts | 62 +++++++++++------- .../prompt-based-structured-caller.ts | 22 ++----- src/core/logging/phaseUsageEvent.ts | 40 ++++++------ src/core/logging/providerEvent.ts | 63 +++++++++++-------- src/infra/observability/otelFoundation.ts | 13 ++++ .../observability/usageEventsSpanProcessor.ts | 6 +- 9 files changed, 146 insertions(+), 134 deletions(-) diff --git a/src/__tests__/logging-contracts.test.ts b/src/__tests__/logging-contracts.test.ts index bb866a75d..34ff39103 100644 --- a/src/__tests__/logging-contracts.test.ts +++ b/src/__tests__/logging-contracts.test.ts @@ -72,6 +72,8 @@ describe('logging contracts', () => { ); expect(record.usage.cached_input_tokens).toBe(4); + expect(record.usage.cache_creation_input_tokens).toBe(2); + expect(record.usage.cache_read_input_tokens).toBe(2); }); it('should reject usage_missing records with unknown reason values', () => { diff --git a/src/__tests__/otelFoundation.test.ts b/src/__tests__/otelFoundation.test.ts index 6bcf2f3c5..d671baa5b 100644 --- a/src/__tests__/otelFoundation.test.ts +++ b/src/__tests__/otelFoundation.test.ts @@ -339,66 +339,38 @@ describe('otel foundation', () => { } }); - it('should release the shared SDK when run exporter registration fails', async () => { + it('should reject mismatched run-scoped exporter ids before starting the SDK', async () => { const foundation = await loadFoundationWithMockedSdk(); const tempDir = mkdtempSync(join(tmpdir(), 'takt-otel-registration-failure-')); - const firstPhaseUsageLogPath = join(tempDir, 'first-usage-events.phase.jsonl'); - const secondShadowLogPath = join(tempDir, 'second-otel-session-shadow.jsonl'); - const secondPhaseUsageLogPath = join(tempDir, 'second-usage-events.phase.jsonl'); - let first: { shutdown(): Promise } | undefined; + const shadowLogPath = join(tempDir, 'session-otel-session-shadow.jsonl'); + const phaseUsageLogPath = join(tempDir, 'session-usage-events.phase.jsonl'); + const monitorPath = join(tempDir, 'monitor.json'); try { - first = await foundation.initializeOtelFoundation( - enabledUsageEventsPhaseObservability, - { - usageEventsExporter: { - runId: 'run-1', - sessionId: 'session-1', - phaseUsageLogPath: firstPhaseUsageLogPath, - }, - }, - ); - await expect(foundation.initializeOtelFoundation( enabledAllObservability, { sessionLogExporter: { runId: 'run-2', - shadowLogPath: secondShadowLogPath, - sanitizedTask: 'second task', + shadowLogPath, + sanitizedTask: 'task', workflowName: 'default', }, usageEventsExporter: { runId: 'run-1', - sessionId: 'session-duplicate', - phaseUsageLogPath: secondPhaseUsageLogPath, + sessionId: 'session-1', + phaseUsageLogPath, + }, + monitorJsonExporter: { + runId: 'run-2', + monitorPath, }, }, - )).rejects.toThrow('Phase usage event exporter is already registered for runId: run-1'); - - const sessionLogProcessor = foundation.constructedOptions[0]?.spanProcessors?.[0] as { - onStart(span: unknown, parentContext: unknown): void; - }; - sessionLogProcessor.onStart({ - name: 'step.implement', - startTime: [1_778_777_205, 0], - attributes: { - 'takt.run.id': 'run-2', - 'takt.step.name': 'implement', - 'takt.step.persona': 'coder', - 'takt.step.iteration': 1, - }, - }, {}); - const secondShadowRecords = readFileSync(secondShadowLogPath, 'utf-8').trim().split('\n'); - expect(secondShadowRecords).toHaveLength(1); + )).rejects.toThrow('Run-scoped OpenTelemetry exporters must share the same runId'); - await first.shutdown(); - - expect(foundation.shutdownMock).toHaveBeenCalledOnce(); + expect(foundation.startMock).not.toHaveBeenCalled(); + expect(foundation.constructedOptions).toEqual([]); } finally { - if (first) { - await first.shutdown(); - } rmSync(tempDir, { recursive: true, force: true }); } }); diff --git a/src/__tests__/usageEventsSpanProcessor.test.ts b/src/__tests__/usageEventsSpanProcessor.test.ts index 1b1f83632..9880f9cc4 100644 --- a/src/__tests__/usageEventsSpanProcessor.test.ts +++ b/src/__tests__/usageEventsSpanProcessor.test.ts @@ -63,7 +63,7 @@ describe('UsageEventsSpanProcessor', () => { ]); }); - it('rejects duplicate runId registrations', () => { + it('ignores duplicate runId registrations without redirecting existing output', () => { const firstLogPath = createTempLogPath('first-usage-events.phase.jsonl'); const secondLogPath = createTempLogPath('second-usage-events.phase.jsonl'); const processor = new UsageEventsSpanProcessor(); @@ -77,7 +77,17 @@ describe('UsageEventsSpanProcessor', () => { runId: 'run-1', sessionId: 'session-duplicate', phaseUsageLogPath: secondLogPath, - })).toThrow('Phase usage event exporter is already registered for runId: run-1'); + })).not.toThrow(); + + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + + expect(readRecords(firstLogPath)).toEqual([ + expect.objectContaining({ + run_id: 'run-1', + session_id: 'session-1', + phase: 'phase1_execute', + }), + ]); expect(existsSync(secondLogPath)).toBe(false); }); diff --git a/src/agents/judge-status-usecase.ts b/src/agents/judge-status-usecase.ts index 867f9519d..e33ced688 100644 --- a/src/agents/judge-status-usecase.ts +++ b/src/agents/judge-status-usecase.ts @@ -16,20 +16,24 @@ export interface JudgeStatusOptions { language?: Language; interactive?: boolean; onStream?: StreamCallback; - onJudgeStage?: (entry: { - stage: 1 | 2 | 3; - method: 'structured_output' | 'phase3_tag' | 'ai_judge'; - status: 'done' | 'error' | 'skipped'; - instruction: string; - response: string; - providerUsage?: ProviderUsageSnapshot; - }) => void; + onJudgeStage?: (entry: JudgeStageLogEntry) => void; onStructuredPromptResolved?: (promptParts: { systemPrompt: string; userInstruction: string; }) => void; } +export interface JudgeStageLogEntry { + stage: 1 | 2 | 3; + method: 'structured_output' | 'phase3_tag' | 'ai_judge'; + status: 'done' | 'error' | 'skipped'; + instruction: string; + response: string; + providerUsage?: ProviderUsageSnapshot; +} + +type JudgeResponseEntry = Pick; + export interface TagJudgeRunOptions { cwd: string; provider?: ProviderType; @@ -133,6 +137,28 @@ export async function evaluateCondition( return detectJudgeIndex(response.content); } +export function createJudgeStageRecorder(): { + capture(entry: JudgeResponseEntry): void; + stage(entry: Pick): JudgeStageLogEntry; +} { + let latest: JudgeResponseEntry = { + status: 'skipped', + instruction: '', + response: '', + }; + return { + capture(entry): void { + latest = entry; + }, + stage(entry): JudgeStageLogEntry { + return { + ...entry, + ...latest, + }; + }, + }; +} + export async function judgeStatus( structuredInstruction: string, tagInstruction: string, @@ -207,32 +233,20 @@ export async function judgeStatus( const conditions = buildJudgeConditions(rules, interactiveEnabled); if (conditions.length > 0) { - let stage3Status: 'done' | 'error' | 'skipped' = 'skipped'; - let stage3Instruction = ''; - let stage3Response = ''; - let stage3ProviderUsage: ProviderUsageSnapshot | undefined; + const stage3 = createJudgeStageRecorder(); const normalizedConditions = conditions.map((c, pos) => ({ index: pos, text: c.text })); const fallbackPosition = await evaluateCondition(structuredInstruction, normalizedConditions, { cwd: options.cwd, provider: options.provider, resolvedProvider: options.resolvedProvider, resolvedModel: options.resolvedModel, - onJudgeResponse: (entry) => { - stage3Status = entry.status; - stage3Instruction = entry.instruction; - stage3Response = entry.response; - stage3ProviderUsage = entry.providerUsage; - }, + onJudgeResponse: stage3.capture, }); - options.onJudgeStage?.({ + options.onJudgeStage?.(stage3.stage({ stage: 3, method: 'ai_judge', - status: stage3Status, - instruction: stage3Instruction, - response: stage3Response, - providerUsage: stage3ProviderUsage, - }); + })); if (fallbackPosition >= 0 && fallbackPosition < conditions.length) { const originalIndex = conditions[fallbackPosition]?.index; diff --git a/src/agents/structured-caller/prompt-based-structured-caller.ts b/src/agents/structured-caller/prompt-based-structured-caller.ts index e3ecaf369..9939d0bfd 100644 --- a/src/agents/structured-caller/prompt-based-structured-caller.ts +++ b/src/agents/structured-caller/prompt-based-structured-caller.ts @@ -1,5 +1,4 @@ import type { WorkflowRule, PartDefinition } from '../../core/models/types.js'; -import type { ProviderUsageSnapshot } from '../../core/models/response.js'; import { buildPromptBasedDecomposePrompt, buildPromptBasedMorePartsPrompt, @@ -8,6 +7,7 @@ import { import { buildJudgePrompt, detectJudgeIndex, isValidRuleIndex, buildJudgeConditions } from '../judge-utils.js'; import { runAgent } from '../runner.js'; import { + createJudgeStageRecorder, runTagJudgeStage, type EvaluateConditionOptions, type JudgeStatusOptions, @@ -102,31 +102,19 @@ export class PromptBasedStructuredCaller implements StructuredCaller { const conditions = buildJudgeConditions(rules, interactiveEnabled); if (conditions.length > 0) { - let stage3Status: 'done' | 'error' | 'skipped' = 'skipped'; - let stage3Instruction = ''; - let stage3Response = ''; - let stage3ProviderUsage: ProviderUsageSnapshot | undefined; + const stage3 = createJudgeStageRecorder(); const fallbackIndex = await this.evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd, provider: options.provider, resolvedProvider: options.resolvedProvider, resolvedModel: options.resolvedModel, - onJudgeResponse: (entry) => { - stage3Status = entry.status; - stage3Instruction = entry.instruction; - stage3Response = entry.response; - stage3ProviderUsage = entry.providerUsage; - }, + onJudgeResponse: stage3.capture, }); - options.onJudgeStage?.({ + options.onJudgeStage?.(stage3.stage({ stage: 3, method: 'ai_judge', - status: stage3Status, - instruction: stage3Instruction, - response: stage3Response, - providerUsage: stage3ProviderUsage, - }); + })); if (isValidRuleIndex(fallbackIndex, rules, interactiveEnabled)) { return { ruleIndex: fallbackIndex, method: 'ai_judge' }; diff --git a/src/core/logging/phaseUsageEvent.ts b/src/core/logging/phaseUsageEvent.ts index 21622c457..e544ed838 100644 --- a/src/core/logging/phaseUsageEvent.ts +++ b/src/core/logging/phaseUsageEvent.ts @@ -3,6 +3,8 @@ import { USAGE_MISSING_REASONS, type UsageMissingReason, } from './contracts.js'; +import { buildUsageEventPayload } from './providerEvent.js'; +import type { ProviderUsageSnapshot } from '../models/response.js'; import { isProviderType, type ProviderType } from '../../shared/types/provider.js'; export type PhaseUsageType = @@ -171,12 +173,19 @@ function buildRecord( function extractUsage(attributes: Record): Pick & { missing: boolean; } { + const snapshot = usageSnapshotFromAttributes(attributes); + const payload = buildUsageEventPayload(snapshot); + return { + missing: payload.usage_missing, + ...payload, + }; +} + +function usageSnapshotFromAttributes(attributes: Record): ProviderUsageSnapshot { if (attributes['takt.usage.missing'] === true) { return { - missing: true, - usage_missing: true, + usageMissing: true, reason: getUsageMissingReason(attributes['takt.usage.missing_reason']), - usage: {}, }; } @@ -187,26 +196,21 @@ function extractUsage(attributes: Record): Pick(key: K, value: number | undefined): Partial> { - return value !== undefined ? { [key]: value } as Partial> : {}; -} - function hrTimeToIso(time: readonly [number, number] | undefined): string { if (!time) { return new Date().toISOString(); diff --git a/src/core/logging/providerEvent.ts b/src/core/logging/providerEvent.ts index 49f3bdd74..5952ed3ad 100644 --- a/src/core/logging/providerEvent.ts +++ b/src/core/logging/providerEvent.ts @@ -33,9 +33,13 @@ export interface UsageEventLogRecord { output_tokens?: number; total_tokens?: number; cached_input_tokens?: number; + cache_creation_input_tokens?: number; + cache_read_input_tokens?: number; }; } +export type UsageEventPayload = Pick; + interface UsageEventMeta { runId: string; sessionId: string; @@ -129,47 +133,52 @@ export function buildUsageEventRecord( meta: UsageEventMeta, params: BuildUsageRecordParams ): UsageEventLogRecord { - if (params.usage.usageMissing) { - if (typeof params.usage.reason !== 'string' || params.usage.reason.length === 0) { + const payload = buildUsageEventPayload(params.usage); + return { + run_id: meta.runId, + session_id: meta.sessionId, + provider: meta.provider, + provider_model: meta.providerModel, + step: meta.step, + step_type: meta.stepType, + timestamp: (params.timestamp ?? new Date()).toISOString(), + success: params.success, + ...payload, + }; +} + +export function buildUsageEventPayload(usageSnapshot: ProviderUsageSnapshot): UsageEventPayload { + if (usageSnapshot.usageMissing) { + if (typeof usageSnapshot.reason !== 'string' || usageSnapshot.reason.length === 0) { throw new Error('[usage-events] reason is required when usageMissing=true'); } return { - run_id: meta.runId, - session_id: meta.sessionId, - provider: meta.provider, - provider_model: meta.providerModel, - step: meta.step, - step_type: meta.stepType, - timestamp: (params.timestamp ?? new Date()).toISOString(), - success: params.success, usage_missing: true, - reason: assertUsageMissingReason(params.usage.reason), + reason: assertUsageMissingReason(usageSnapshot.reason), usage: {}, }; } - assertFiniteNumber(params.usage.inputTokens, 'usage.inputTokens'); - assertFiniteNumber(params.usage.outputTokens, 'usage.outputTokens'); - assertFiniteNumber(params.usage.totalTokens, 'usage.totalTokens'); + assertFiniteNumber(usageSnapshot.inputTokens, 'usage.inputTokens'); + assertFiniteNumber(usageSnapshot.outputTokens, 'usage.outputTokens'); + assertFiniteNumber(usageSnapshot.totalTokens, 'usage.totalTokens'); const usage = { - input_tokens: params.usage.inputTokens, - output_tokens: params.usage.outputTokens, - total_tokens: params.usage.totalTokens, - ...(Number.isFinite(params.usage.cachedInputTokens) - ? { cached_input_tokens: params.usage.cachedInputTokens } + input_tokens: usageSnapshot.inputTokens, + output_tokens: usageSnapshot.outputTokens, + total_tokens: usageSnapshot.totalTokens, + ...(Number.isFinite(usageSnapshot.cachedInputTokens) + ? { cached_input_tokens: usageSnapshot.cachedInputTokens } + : {}), + ...(Number.isFinite(usageSnapshot.cacheCreationInputTokens) + ? { cache_creation_input_tokens: usageSnapshot.cacheCreationInputTokens } + : {}), + ...(Number.isFinite(usageSnapshot.cacheReadInputTokens) + ? { cache_read_input_tokens: usageSnapshot.cacheReadInputTokens } : {}), }; return { - run_id: meta.runId, - session_id: meta.sessionId, - provider: meta.provider, - provider_model: meta.providerModel, - step: meta.step, - step_type: meta.stepType, - timestamp: (params.timestamp ?? new Date()).toISOString(), - success: params.success, usage_missing: false, usage, }; diff --git a/src/infra/observability/otelFoundation.ts b/src/infra/observability/otelFoundation.ts index e6995b1b0..46a360535 100644 --- a/src/infra/observability/otelFoundation.ts +++ b/src/infra/observability/otelFoundation.ts @@ -48,6 +48,7 @@ export async function initializeOtelFoundation( if (!config.enabled) { return createNoopHandle(); } + validateRunScopedExporterIds(options); const shared = await acquireSdk(); let registrations: OtelRegistration[]; @@ -143,6 +144,18 @@ function registerRunExporters( return registrations; } +function validateRunScopedExporterIds(options: OtelFoundationOptions | undefined): void { + const runIds = [ + options?.sessionLogExporter?.runId, + options?.usageEventsExporter?.runId, + options?.monitorJsonExporter?.runId, + ].filter((runId): runId is string => typeof runId === 'string'); + const uniqueRunIds = new Set(runIds); + if (uniqueRunIds.size > 1) { + throw new Error('Run-scoped OpenTelemetry exporters must share the same runId'); + } +} + function unregisterAll(registrations: OtelRegistration[]): void { for (const unregister of registrations.splice(0).reverse()) { unregister(); diff --git a/src/infra/observability/usageEventsSpanProcessor.ts b/src/infra/observability/usageEventsSpanProcessor.ts index 31523de5d..8d6973bf2 100644 --- a/src/infra/observability/usageEventsSpanProcessor.ts +++ b/src/infra/observability/usageEventsSpanProcessor.ts @@ -27,7 +27,11 @@ export class UsageEventsSpanProcessor implements SpanProcessor { register(options: UsageEventsSpanProcessorOptions): () => void { if (this.registrations.has(options.runId)) { - throw new Error(`Phase usage event exporter is already registered for runId: ${options.runId}`); + log.warn('Ignoring duplicate phase usage event registration', { + runId: options.runId, + phaseUsageLogPath: options.phaseUsageLogPath, + }); + return () => {}; } this.registrations.set(options.runId, options); return () => { From 3a2b82565052856a567e8643b225521468dd9d99 Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Mon, 8 Jun 2026 15:06:16 +0900 Subject: [PATCH 7/9] fix(observability): capture claude headless usage --- src/__tests__/claude-headless-client.test.ts | 32 +++++++ .../claude-headless-stream-json.test.ts | 88 +++++++++++++++++++ src/infra/claude-headless/result-response.ts | 1 + .../claude-headless/stream-json-lines.ts | 49 +++++++++++ 4 files changed, 170 insertions(+) diff --git a/src/__tests__/claude-headless-client.test.ts b/src/__tests__/claude-headless-client.test.ts index a672c78a8..bfd3cf2e6 100644 --- a/src/__tests__/claude-headless-client.test.ts +++ b/src/__tests__/claude-headless-client.test.ts @@ -133,6 +133,38 @@ describe('callClaudeHeadless', () => { expect(res.content).toBe('ok'); }); + it('returns provider usage from the final stream-json result', async () => { + stubSpawn({ + stdoutChunks: [ + `${JSON.stringify({ + type: 'result', + subtype: 'success', + result: 'ok', + usage: { + input_tokens: 12, + output_tokens: 3, + cache_creation_input_tokens: 5, + cache_read_input_tokens: 7, + }, + })}\n`, + ], + closeCode: 0, + }); + + const res = await callClaudeHeadless('agent', 'hi', { cwd: '/tmp' }); + + expect(res.status).toBe('done'); + expect(res.providerUsage).toEqual({ + inputTokens: 12, + outputTokens: 3, + totalTokens: 15, + cachedInputTokens: 12, + cacheCreationInputTokens: 5, + cacheReadInputTokens: 7, + usageMissing: false, + }); + }); + it('uses only the final result text when assistant content and result contain the same text', async () => { stubSpawn({ stdoutChunks: [ diff --git a/src/__tests__/claude-headless-stream-json.test.ts b/src/__tests__/claude-headless-stream-json.test.ts index 48ddf806d..75cf3c125 100644 --- a/src/__tests__/claude-headless-stream-json.test.ts +++ b/src/__tests__/claude-headless-stream-json.test.ts @@ -31,6 +31,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: undefined, + providerUsage: undefined, }); expect(aggregateContentFromStdout(stdout)).toBe('ab'); }); @@ -98,6 +99,7 @@ describe('claude-headless stream-json line parsing', () => { success: false, error: 'partial answer', structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -123,6 +125,7 @@ describe('claude-headless stream-json line parsing', () => { success: false, error: 'final failure', structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -148,6 +151,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -169,6 +173,7 @@ describe('claude-headless stream-json line parsing', () => { success: false, error: 'explicit failure', structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -188,6 +193,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -213,6 +219,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -237,6 +244,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -257,6 +265,7 @@ describe('claude-headless stream-json line parsing', () => { success: false, error: 'explicit failure', structuredOutput: undefined, + providerUsage: undefined, }); }); @@ -277,6 +286,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: { decision: 'approved' }, + providerUsage: undefined, }); }); @@ -297,6 +307,83 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: { decision: 'approved' }, + providerUsage: undefined, + }); + }); + + it('captures provider usage from the final result event', () => { + const stdout = [ + JSON.stringify({ + type: 'result', + subtype: 'success', + result: 'final answer', + usage: { + input_tokens: 2353, + output_tokens: 4, + cache_creation_input_tokens: 2021, + cache_read_input_tokens: 15821, + }, + }), + ].join('\n'); + + expect(aggregateResultFromStdout(stdout)).toEqual({ + content: 'final answer', + displayText: '', + hasResult: true, + success: true, + error: undefined, + structuredOutput: undefined, + providerUsage: { + inputTokens: 2353, + outputTokens: 4, + totalTokens: 2357, + cachedInputTokens: 17842, + cacheCreationInputTokens: 2021, + cacheReadInputTokens: 15821, + usageMissing: false, + }, + }); + }); + + it('captures provider usage when only one cache token field is present', () => { + const stdout = [ + JSON.stringify({ + type: 'result', + subtype: 'success', + result: 'final answer', + usage: { + input_tokens: 20, + output_tokens: 5, + cache_read_input_tokens: 7, + }, + }), + ].join('\n'); + + expect(aggregateResultFromStdout(stdout).providerUsage).toEqual({ + inputTokens: 20, + outputTokens: 5, + totalTokens: 25, + cachedInputTokens: 7, + cacheReadInputTokens: 7, + usageMissing: false, + }); + }); + + it('marks provider usage missing when required token fields are absent', () => { + const stdout = [ + JSON.stringify({ + type: 'result', + subtype: 'success', + result: 'final answer', + usage: { + input_tokens: 20, + }, + }), + ].join('\n'); + + expect(aggregateResultFromStdout(stdout).providerUsage).toEqual({ + usageMissing: true, + reason: 'usage_tokens_missing', }); }); @@ -317,6 +404,7 @@ describe('claude-headless stream-json line parsing', () => { success: true, error: undefined, structuredOutput: undefined, + providerUsage: undefined, }); }); diff --git a/src/infra/claude-headless/result-response.ts b/src/infra/claude-headless/result-response.ts index e10202c3e..b158f0055 100644 --- a/src/infra/claude-headless/result-response.ts +++ b/src/infra/claude-headless/result-response.ts @@ -128,5 +128,6 @@ export function buildClaudeHeadlessResponse(input: ClaudeHeadlessResponseInput): timestamp: new Date(), sessionId, structuredOutput, + providerUsage: parsed.providerUsage, }; } diff --git a/src/infra/claude-headless/stream-json-lines.ts b/src/infra/claude-headless/stream-json-lines.ts index 0cb43a0a2..17f8e4570 100644 --- a/src/infra/claude-headless/stream-json-lines.ts +++ b/src/infra/claude-headless/stream-json-lines.ts @@ -1,9 +1,16 @@ +import { USAGE_MISSING_REASONS } from '../../core/logging/contracts.js'; +import type { ProviderUsageSnapshot } from '../../core/models/response.js'; + function toRecord(value: unknown): Record | undefined { return value !== null && typeof value === 'object' && !Array.isArray(value) ? (value as Record) : undefined; } +function toNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + function pickString(source: Record | undefined, keys: string[]): string | undefined { if (!source) { return undefined; @@ -164,6 +171,44 @@ function extractStructuredOutput(event: Record): Record): ProviderUsageSnapshot | undefined { + const usage = toRecord(event.usage); + if (!usage) { + return undefined; + } + + const inputTokens = toNumber(usage.input_tokens); + const outputTokens = toNumber(usage.output_tokens); + const cacheCreationInputTokens = toNumber(usage.cache_creation_input_tokens); + const cacheReadInputTokens = toNumber(usage.cache_read_input_tokens); + if (inputTokens === undefined || outputTokens === undefined) { + return { + usageMissing: true, + reason: USAGE_MISSING_REASONS.TOKENS_MISSING, + }; + } + + const providerUsage: ProviderUsageSnapshot = { + inputTokens, + outputTokens, + totalTokens: inputTokens + outputTokens, + usageMissing: false, + }; + const cachedInputTokens = cacheCreationInputTokens !== undefined && cacheReadInputTokens !== undefined + ? cacheCreationInputTokens + cacheReadInputTokens + : cacheReadInputTokens ?? cacheCreationInputTokens; + if (cachedInputTokens !== undefined) { + providerUsage.cachedInputTokens = cachedInputTokens; + } + if (cacheCreationInputTokens !== undefined) { + providerUsage.cacheCreationInputTokens = cacheCreationInputTokens; + } + if (cacheReadInputTokens !== undefined) { + providerUsage.cacheReadInputTokens = cacheReadInputTokens; + } + return providerUsage; +} + function isSuccessfulResultEvent(event: Record): boolean { if (event.is_error === true || event.isError === true) { return false; @@ -180,6 +225,7 @@ export interface StreamJsonStdoutResult { success: boolean; error?: string; structuredOutput?: Record; + providerUsage?: ProviderUsageSnapshot; } export function aggregateResultFromStdout(stdout: string): StreamJsonStdoutResult { @@ -189,6 +235,7 @@ export function aggregateResultFromStdout(stdout: string): StreamJsonStdoutResul let success = false; let error: string | undefined; let structuredOutput: Record | undefined; + let providerUsage: ProviderUsageSnapshot | undefined; for (const line of stdout.split('\n')) { const parsed = parseStreamJsonLine(line); @@ -211,6 +258,7 @@ export function aggregateResultFromStdout(stdout: string): StreamJsonStdoutResul success = isSuccessfulResultEvent(root); error = success ? undefined : extractResultError(root, resultContent); structuredOutput = extractStructuredOutput(root); + providerUsage = extractProviderUsage(root); } const normalizedDisplayText = displayText.trim(); @@ -223,6 +271,7 @@ export function aggregateResultFromStdout(stdout: string): StreamJsonStdoutResul success: fallbackSuccess, error, structuredOutput, + providerUsage, }; } From 28bfdeb71ae089e0fd5b082171b8ba70f8e19c44 Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Mon, 8 Jun 2026 15:08:36 +0900 Subject: [PATCH 8/9] refactor(observability): share claude usage normalization --- .../claude-executor-structured-output.test.ts | 21 ++++++++ .../claude-headless/stream-json-lines.ts | 46 +---------------- src/infra/claude/executor.ts | 41 ++------------- src/infra/claude/usage.ts | 50 +++++++++++++++++++ 4 files changed, 76 insertions(+), 82 deletions(-) create mode 100644 src/infra/claude/usage.ts diff --git a/src/__tests__/claude-executor-structured-output.test.ts b/src/__tests__/claude-executor-structured-output.test.ts index 64b91b303..dd246c14e 100644 --- a/src/__tests__/claude-executor-structured-output.test.ts +++ b/src/__tests__/claude-executor-structured-output.test.ts @@ -241,4 +241,25 @@ describe('QueryExecutor — structuredOutput 抽出', () => { reason: 'usage_not_available', }); }); + + it('usage に必須 token が欠ける場合は usage_tokens_missing を返す', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { + type: 'result', + subtype: 'success', + result: 'done', + usage: { + input_tokens: 12, + }, + }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.providerUsage).toEqual({ + usageMissing: true, + reason: 'usage_tokens_missing', + }); + }); }); diff --git a/src/infra/claude-headless/stream-json-lines.ts b/src/infra/claude-headless/stream-json-lines.ts index 17f8e4570..b4ec5b456 100644 --- a/src/infra/claude-headless/stream-json-lines.ts +++ b/src/infra/claude-headless/stream-json-lines.ts @@ -1,5 +1,5 @@ -import { USAGE_MISSING_REASONS } from '../../core/logging/contracts.js'; import type { ProviderUsageSnapshot } from '../../core/models/response.js'; +import { extractClaudeProviderUsage } from '../claude/usage.js'; function toRecord(value: unknown): Record | undefined { return value !== null && typeof value === 'object' && !Array.isArray(value) @@ -7,10 +7,6 @@ function toRecord(value: unknown): Record | undefined { : undefined; } -function toNumber(value: unknown): number | undefined { - return typeof value === 'number' && Number.isFinite(value) ? value : undefined; -} - function pickString(source: Record | undefined, keys: string[]): string | undefined { if (!source) { return undefined; @@ -171,44 +167,6 @@ function extractStructuredOutput(event: Record): Record): ProviderUsageSnapshot | undefined { - const usage = toRecord(event.usage); - if (!usage) { - return undefined; - } - - const inputTokens = toNumber(usage.input_tokens); - const outputTokens = toNumber(usage.output_tokens); - const cacheCreationInputTokens = toNumber(usage.cache_creation_input_tokens); - const cacheReadInputTokens = toNumber(usage.cache_read_input_tokens); - if (inputTokens === undefined || outputTokens === undefined) { - return { - usageMissing: true, - reason: USAGE_MISSING_REASONS.TOKENS_MISSING, - }; - } - - const providerUsage: ProviderUsageSnapshot = { - inputTokens, - outputTokens, - totalTokens: inputTokens + outputTokens, - usageMissing: false, - }; - const cachedInputTokens = cacheCreationInputTokens !== undefined && cacheReadInputTokens !== undefined - ? cacheCreationInputTokens + cacheReadInputTokens - : cacheReadInputTokens ?? cacheCreationInputTokens; - if (cachedInputTokens !== undefined) { - providerUsage.cachedInputTokens = cachedInputTokens; - } - if (cacheCreationInputTokens !== undefined) { - providerUsage.cacheCreationInputTokens = cacheCreationInputTokens; - } - if (cacheReadInputTokens !== undefined) { - providerUsage.cacheReadInputTokens = cacheReadInputTokens; - } - return providerUsage; -} - function isSuccessfulResultEvent(event: Record): boolean { if (event.is_error === true || event.isError === true) { return false; @@ -258,7 +216,7 @@ export function aggregateResultFromStdout(stdout: string): StreamJsonStdoutResul success = isSuccessfulResultEvent(root); error = success ? undefined : extractResultError(root, resultContent); structuredOutput = extractStructuredOutput(root); - providerUsage = extractProviderUsage(root); + providerUsage = extractClaudeProviderUsage(root.usage); } const normalizedDisplayText = displayText.trim(); diff --git a/src/infra/claude/executor.ts b/src/infra/claude/executor.ts index 62a37f7ce..36e94827f 100644 --- a/src/infra/claude/executor.ts +++ b/src/infra/claude/executor.ts @@ -36,11 +36,9 @@ import { containsRateLimitMarker, resolveRateLimitErrorMessage, } from '../rate-limit/detection.js'; +import { extractClaudeProviderUsage } from './usage.js'; const log = createLogger('claude-sdk'); -function toNumber(value: unknown): number | undefined { - return typeof value === 'number' && Number.isFinite(value) ? value : undefined; -} function isRejectedRateLimitEvent(message: SDKRateLimitEvent): boolean { // SDK は rate_limit_event を情報イベントとして毎回流す。 @@ -83,46 +81,13 @@ function describeRateLimitSignal(message: SDKMessage): string | undefined { function extractProviderUsage(resultMsg: SDKResultMessage): ProviderUsageSnapshot { const rawUsage = (resultMsg as unknown as { usage?: unknown }).usage; - if (!rawUsage || typeof rawUsage !== 'object') { + const providerUsage = extractClaudeProviderUsage(rawUsage); + if (!providerUsage) { return { usageMissing: true, reason: USAGE_MISSING_REASONS.NOT_AVAILABLE, }; } - - const usage = rawUsage as Record; - const inputTokens = toNumber(usage.input_tokens); - const outputTokens = toNumber(usage.output_tokens); - const cacheCreationInputTokens = toNumber(usage.cache_creation_input_tokens); - const cacheReadInputTokens = toNumber(usage.cache_read_input_tokens); - if (inputTokens === undefined || outputTokens === undefined) { - return { - usageMissing: true, - reason: USAGE_MISSING_REASONS.TOKENS_MISSING, - }; - } - const totalTokens = inputTokens + outputTokens; - const cachedInputTokens = ( - cacheCreationInputTokens !== undefined && cacheReadInputTokens !== undefined - ? cacheCreationInputTokens + cacheReadInputTokens - : cacheReadInputTokens ?? cacheCreationInputTokens - ); - - const providerUsage: ProviderUsageSnapshot = { - inputTokens, - outputTokens, - totalTokens, - usageMissing: false, - }; - if (cachedInputTokens !== undefined) { - providerUsage.cachedInputTokens = cachedInputTokens; - } - if (cacheCreationInputTokens !== undefined) { - providerUsage.cacheCreationInputTokens = cacheCreationInputTokens; - } - if (cacheReadInputTokens !== undefined) { - providerUsage.cacheReadInputTokens = cacheReadInputTokens; - } return providerUsage; } diff --git a/src/infra/claude/usage.ts b/src/infra/claude/usage.ts new file mode 100644 index 000000000..b732fafb0 --- /dev/null +++ b/src/infra/claude/usage.ts @@ -0,0 +1,50 @@ +import { USAGE_MISSING_REASONS } from '../../core/logging/contracts.js'; +import type { ProviderUsageSnapshot } from '../../core/models/response.js'; + +function toRecord(value: unknown): Record | undefined { + return value !== null && typeof value === 'object' && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function toNumber(value: unknown): number | undefined { + return typeof value === 'number' && Number.isFinite(value) ? value : undefined; +} + +export function extractClaudeProviderUsage(rawUsage: unknown): ProviderUsageSnapshot | undefined { + const usage = toRecord(rawUsage); + if (!usage) { + return undefined; + } + + const inputTokens = toNumber(usage.input_tokens); + const outputTokens = toNumber(usage.output_tokens); + const cacheCreationInputTokens = toNumber(usage.cache_creation_input_tokens); + const cacheReadInputTokens = toNumber(usage.cache_read_input_tokens); + if (inputTokens === undefined || outputTokens === undefined) { + return { + usageMissing: true, + reason: USAGE_MISSING_REASONS.TOKENS_MISSING, + }; + } + + const providerUsage: ProviderUsageSnapshot = { + inputTokens, + outputTokens, + totalTokens: inputTokens + outputTokens, + usageMissing: false, + }; + const cachedInputTokens = cacheCreationInputTokens !== undefined && cacheReadInputTokens !== undefined + ? cacheCreationInputTokens + cacheReadInputTokens + : cacheReadInputTokens ?? cacheCreationInputTokens; + if (cachedInputTokens !== undefined) { + providerUsage.cachedInputTokens = cachedInputTokens; + } + if (cacheCreationInputTokens !== undefined) { + providerUsage.cacheCreationInputTokens = cacheCreationInputTokens; + } + if (cacheReadInputTokens !== undefined) { + providerUsage.cacheReadInputTokens = cacheReadInputTokens; + } + return providerUsage; +} From 4cf87e7b4ba88f16418995411fc43e4f5bd93999 Mon Sep 17 00:00:00 2001 From: Junichi Kato Date: Mon, 8 Jun 2026 15:18:42 +0900 Subject: [PATCH 9/9] fix(observability): address phase usage review comments --- e2e/specs/observability.e2e.ts | 34 ++++++++++- src/__tests__/otelFoundation.test.ts | 51 +++++++++------- .../usageEventsSpanProcessor.test.ts | 60 ++++++++++++++++++- src/core/workflow/engine/ArpeggioRunner.ts | 5 ++ .../observability/usageEventsSpanProcessor.ts | 9 ++- 5 files changed, 132 insertions(+), 27 deletions(-) diff --git a/e2e/specs/observability.e2e.ts b/e2e/specs/observability.e2e.ts index e3de9f1cb..75abea33b 100644 --- a/e2e/specs/observability.e2e.ts +++ b/e2e/specs/observability.e2e.ts @@ -24,6 +24,36 @@ function readJsonl(path: string): JsonRecord[] { .map((line) => JSON.parse(line) as JsonRecord); } +function isJsonRecord(value: unknown): value is JsonRecord { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function monitorHasRunIdAttribute(monitor: JsonRecord): boolean { + const scopeMetrics = monitor.scopeMetrics; + if (!Array.isArray(scopeMetrics)) { + return false; + } + return scopeMetrics.some((scopeMetric) => { + if (!isJsonRecord(scopeMetric) || !Array.isArray(scopeMetric.metrics)) { + return false; + } + return scopeMetric.metrics.some((metric) => { + if (!isJsonRecord(metric) || !Array.isArray(metric.points)) { + return false; + } + return metric.points.some((point) => { + if (!isJsonRecord(point) || !isJsonRecord(point.attributes)) { + return false; + } + return ( + Object.prototype.hasOwnProperty.call(point.attributes, 'takt.run.id') && + typeof point.attributes['takt.run.id'] === 'string' + ); + }); + }); + }); +} + function firstRunRoot(repoPath: string): string { const runsDir = join(repoPath, '.takt', 'runs'); const runDirs = readdirSync(runsDir).sort(); @@ -122,8 +152,8 @@ describe('E2E: Observability file outputs (mock)', () => { expect(shadowRecords.some((record) => record.type === 'workflow_start')).toBe(true); expect(shadowRecords.some((record) => record.type === 'workflow_complete')).toBe(true); - const monitor = JSON.parse(readFileSync(monitorPath, 'utf-8')) as unknown; + const monitor = JSON.parse(readFileSync(monitorPath, 'utf-8')) as JsonRecord; expect(monitor).toBeTruthy(); - expect(JSON.stringify(monitor)).toContain('"takt.run.id"'); + expect(monitorHasRunIdAttribute(monitor)).toBe(true); }, 240_000); }); diff --git a/src/__tests__/otelFoundation.test.ts b/src/__tests__/otelFoundation.test.ts index d671baa5b..f783970a7 100644 --- a/src/__tests__/otelFoundation.test.ts +++ b/src/__tests__/otelFoundation.test.ts @@ -19,6 +19,11 @@ type NodeSdkOptionsForTest = { traceExporter?: unknown; }; +type UsageEventsProcessorForTest = { + onEnd(span: unknown): void; + registrations?: Map; +}; + const disabledObservability: ObservabilityConfigForTest = { enabled: false, monitor: false, @@ -206,6 +211,7 @@ describe('otel foundation', () => { }; const handle = await foundation.initializeOtelFoundation(enabledObservability); + const usageEventsProcessor = foundation.constructedOptions[0]?.spanProcessors?.[1] as UsageEventsProcessorForTest; await handle.shutdown(); expect(foundation.sdkImportCount()).toBe(1); @@ -215,7 +221,7 @@ describe('otel foundation', () => { 'service.name': 'takt', 'service.version': packageJson.version, }); - expect(foundation.constructedOptions[0]?.spanProcessors).toHaveLength(2); + expect(usageEventsProcessor.registrations?.size).toBe(0); expect(foundation.constructedOptions[0]?.metricReaders).toHaveLength(1); expect(foundation.constructedOptions[0]).not.toHaveProperty('traceExporter'); expect(foundation.shutdownMock).toHaveBeenCalledOnce(); @@ -301,26 +307,9 @@ describe('otel foundation', () => { }, ); - const processor = foundation.constructedOptions[0]?.spanProcessors?.[1] as { - onEnd(span: unknown): void; - }; - processor.onEnd({ - name: 'phase.implement.execute', - endTime: [1_778_777_205, 0], - attributes: { - 'takt.run.id': 'run-1', - 'takt.provider.name': 'mock', - 'takt.model.name': 'mock-model', - 'takt.step.name': 'implement', - 'takt.step.type': 'agent', - 'takt.phase.number': 1, - 'takt.phase.name': 'execute', - 'takt.phase.status': 'done', - 'gen_ai.usage.input_tokens': 3, - 'gen_ai.usage.output_tokens': 2, - 'gen_ai.usage.total_tokens': 5, - }, - }); + const processor = foundation.constructedOptions[0]?.spanProcessors?.[1] as UsageEventsProcessorForTest; + expect(processor.registrations?.size).toBe(1); + processor.onEnd(makePhaseSpan('run-1')); await handle.shutdown(); const records = readFileSync(phaseUsageLogPath, 'utf-8') @@ -689,3 +678,23 @@ describe('otel foundation', () => { expect(foundation.shutdownMock).toHaveBeenCalledOnce(); }); }); + +function makePhaseSpan(runId: string): Record { + return { + name: 'phase.implement.execute', + endTime: [1_778_777_205, 0], + attributes: { + 'takt.run.id': runId, + 'takt.provider.name': 'mock', + 'takt.model.name': 'mock-model', + 'takt.step.name': 'implement', + 'takt.step.type': 'agent', + 'takt.phase.number': 1, + 'takt.phase.name': 'execute', + 'takt.phase.status': 'done', + 'gen_ai.usage.input_tokens': 3, + 'gen_ai.usage.output_tokens': 2, + 'gen_ai.usage.total_tokens': 5, + }, + }; +} diff --git a/src/__tests__/usageEventsSpanProcessor.test.ts b/src/__tests__/usageEventsSpanProcessor.test.ts index 9880f9cc4..5a7ed8821 100644 --- a/src/__tests__/usageEventsSpanProcessor.test.ts +++ b/src/__tests__/usageEventsSpanProcessor.test.ts @@ -1,7 +1,7 @@ import { existsSync, mkdtempSync, readFileSync, rmSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { afterEach, describe, expect, it } from 'vitest'; +import { afterEach, describe, expect, it, vi } from 'vitest'; import type { ReadableSpan } from '@opentelemetry/sdk-trace-base'; import { UsageEventsSpanProcessor } from '../infra/observability/usageEventsSpanProcessor.js'; @@ -25,6 +25,10 @@ afterEach(() => { rmSync(dir, { recursive: true, force: true }); } tempDirs.clear(); + vi.doUnmock('../infra/fs/index.js'); + vi.doUnmock('../shared/utils/debug.js'); + vi.resetModules(); + vi.restoreAllMocks(); }); describe('UsageEventsSpanProcessor', () => { @@ -103,6 +107,60 @@ describe('UsageEventsSpanProcessor', () => { processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); }).not.toThrow(); }); + + it('reports append failures once per run and clears the flag when unregistered', async () => { + const appendJsonLine = vi.fn(() => { + throw new Error('write failed'); + }); + const errorLog = vi.fn(); + + vi.doMock('../infra/fs/index.js', () => ({ appendJsonLine })); + vi.doMock('../shared/utils/debug.js', () => ({ + createLogger: () => ({ + error: errorLog, + warn: vi.fn(), + }), + })); + + const { UsageEventsSpanProcessor: MockedUsageEventsSpanProcessor } = await import( + '../infra/observability/usageEventsSpanProcessor.js' + ); + const processor = new MockedUsageEventsSpanProcessor(); + const unregisterRun1 = processor.register({ + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath: '/missing/run-1-usage-events.phase.jsonl', + }); + processor.register({ + runId: 'run-2', + sessionId: 'session-2', + phaseUsageLogPath: '/missing/run-2-usage-events.phase.jsonl', + }); + + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + processor.onEnd(makePhaseSpan('run-2') as unknown as ReadableSpan); + unregisterRun1(); + processor.register({ + runId: 'run-1', + sessionId: 'session-1', + phaseUsageLogPath: '/missing/run-1-usage-events.phase.jsonl', + }); + processor.onEnd(makePhaseSpan('run-1') as unknown as ReadableSpan); + await processor.shutdown(); + + expect(appendJsonLine).toHaveBeenCalledTimes(4); + expect(errorLog).toHaveBeenCalledTimes(3); + expect(errorLog).toHaveBeenNthCalledWith(1, 'Failed to append phase usage event log record', expect.objectContaining({ + runId: 'run-1', + })); + expect(errorLog).toHaveBeenNthCalledWith(2, 'Failed to append phase usage event log record', expect.objectContaining({ + runId: 'run-2', + })); + expect(errorLog).toHaveBeenNthCalledWith(3, 'Failed to append phase usage event log record', expect.objectContaining({ + runId: 'run-1', + })); + }); }); function makePhaseSpan(runId: string): Record { diff --git a/src/core/workflow/engine/ArpeggioRunner.ts b/src/core/workflow/engine/ArpeggioRunner.ts index 7083d6502..3ff78ac0e 100644 --- a/src/core/workflow/engine/ArpeggioRunner.ts +++ b/src/core/workflow/engine/ArpeggioRunner.ts @@ -28,6 +28,7 @@ import type { StructuredCaller } from '../../../agents/structured-caller.js'; import { buildGitRules } from '../instruction/instruction-context.js'; import { renderFallbackNotice } from '../instruction/fallback-notice.js'; import { runWithPhaseSpan } from '../observability/workflowSpans.js'; +import { USAGE_MISSING_REASONS } from '../../logging/contracts.js'; const log = createLogger('arpeggio-runner'); @@ -202,6 +203,10 @@ async function executeBatchWithRetry( content: '', success: false, error: lastError, + providerUsage: { + usageMissing: true, + reason: USAGE_MISSING_REASONS.NOT_AVAILABLE, + }, }; }, (result) => ({ status: getBatchResultStatus(result), diff --git a/src/infra/observability/usageEventsSpanProcessor.ts b/src/infra/observability/usageEventsSpanProcessor.ts index 8d6973bf2..ecbd2cc8b 100644 --- a/src/infra/observability/usageEventsSpanProcessor.ts +++ b/src/infra/observability/usageEventsSpanProcessor.ts @@ -17,7 +17,7 @@ export interface UsageEventsSpanProcessorOptions { export class UsageEventsSpanProcessor implements SpanProcessor { private readonly registrations = new Map(); - private hasReportedWriteFailure = false; + private readonly reportedWriteFailureRunIds = new Set(); constructor(options?: UsageEventsSpanProcessorOptions) { if (options) { @@ -36,6 +36,7 @@ export class UsageEventsSpanProcessor implements SpanProcessor { this.registrations.set(options.runId, options); return () => { this.registrations.delete(options.runId); + this.reportedWriteFailureRunIds.delete(options.runId); }; } @@ -65,12 +66,13 @@ export class UsageEventsSpanProcessor implements SpanProcessor { try { appendJsonLine(options.phaseUsageLogPath, record); } catch (error) { - if (this.hasReportedWriteFailure) { + if (this.reportedWriteFailureRunIds.has(options.runId)) { return; } - this.hasReportedWriteFailure = true; + this.reportedWriteFailureRunIds.add(options.runId); const message = error instanceof Error ? error.message : String(error); log.error('Failed to append phase usage event log record', { + runId: options.runId, phaseUsageLogPath: options.phaseUsageLogPath, error: message, }); @@ -81,6 +83,7 @@ export class UsageEventsSpanProcessor implements SpanProcessor { async shutdown(): Promise { this.registrations.clear(); + this.reportedWriteFailureRunIds.clear(); } }