Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions src/crates/core/src/service/session_usage/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,36 @@ fn build_time_breakdown(turns: &[DialogTurnData]) -> UsageTimeBreakdown {
}
}

/// Compute `cache hit rate = cached / input` over records whose provider
/// reported cached tokens. Records without `cached_tokens_available` are
/// excluded from BOTH numerator and denominator — never punish a partially
/// reporting provider by inflating the denominator with un-reported input.
///
/// Returns `None` when no record reports cached tokens, or when the filtered
/// input sum is zero (avoids dividing by zero on edge cases like a tool-only
/// turn). Range: 0.0..=1.0 in normal cases; values >1.0 are theoretically
/// possible on broken providers and left as-is for diagnostic visibility.
fn compute_cache_hit_rate<'a, I>(records: I) -> Option<f64>
where
I: IntoIterator<Item = &'a TokenUsageRecord>,
{
let mut cached_sum: u64 = 0;
let mut input_sum: u64 = 0;
let mut any_reported = false;
for record in records {
if !record.cached_tokens_available {
continue;
}
any_reported = true;
cached_sum += record.cached_tokens as u64;
input_sum += record.input_tokens as u64;
}
if !any_reported || input_sum == 0 {
return None;
}
Some(cached_sum as f64 / input_sum as f64)
}

fn build_token_breakdown(token_records: &[TokenUsageRecord]) -> UsageTokenBreakdown {
if token_records.is_empty() {
return UsageTokenBreakdown {
Expand All @@ -364,6 +394,7 @@ fn build_token_breakdown(token_records: &[TokenUsageRecord]) -> UsageTokenBreakd
total_tokens: None,
cached_tokens: None,
cache_coverage: UsageCacheCoverage::Unavailable,
cache_hit_rate: None,
};
}

Expand Down Expand Up @@ -410,6 +441,7 @@ fn build_token_breakdown(token_records: &[TokenUsageRecord]) -> UsageTokenBreakd
} else {
UsageCacheCoverage::Unavailable
},
cache_hit_rate: compute_cache_hit_rate(token_records.iter()),
}
}

Expand All @@ -433,6 +465,8 @@ fn build_model_breakdown(
output_tokens: Some(0),
total_tokens: Some(0),
cached_tokens: None,
// Filled in by P2-2.
cache_hit_rate: None,
duration_ms: None,
sample_turn_id: None,
sample_turn_index: None,
Expand Down Expand Up @@ -468,6 +502,8 @@ fn build_model_breakdown(
output_tokens: None,
total_tokens: None,
cached_tokens: None,
// Filled in by P2-2.
cache_hit_rate: None,
duration_ms: Some(0),
sample_turn_id: None,
sample_turn_index: None,
Expand All @@ -490,6 +526,21 @@ fn build_model_breakdown(
}
}

// Per-model hit rate: group records by model_id, then apply the same
// numerator/denominator policy as the session-level rate.
let mut records_by_model: HashMap<&str, Vec<&TokenUsageRecord>> = HashMap::new();
for record in token_records {
records_by_model
.entry(record.model_id.as_str())
.or_default()
.push(record);
}
for (model_id, model_records) in &records_by_model {
if let Some(row) = by_model.get_mut(*model_id) {
row.cache_hit_rate = compute_cache_hit_rate(model_records.iter().copied());
}
}

let mut rows: Vec<_> = by_model.into_values().collect();
rows.sort_by(|a, b| a.model_id.cmp(&b.model_id));
rows
Expand Down Expand Up @@ -1900,4 +1951,74 @@ mod tests {
is_subagent: false,
}
}

fn reported_token_record(
model_id: &str,
input_tokens: u32,
output_tokens: u32,
cached_tokens: u32,
) -> TokenUsageRecord {
let mut record = test_token_record(model_id, input_tokens, output_tokens, cached_tokens);
record.cached_tokens_available = true;
record
}

#[test]
fn cache_hit_rate_computes_when_all_records_report_cache() {
let records = vec![
reported_token_record("model-a", 100, 20, 30),
reported_token_record("model-a", 200, 40, 80),
];
let breakdown = build_token_breakdown(&records);
// (30 + 80) / (100 + 200) = 110 / 300
let rate = breakdown.cache_hit_rate.expect("hit rate present");
assert!((rate - (110.0 / 300.0)).abs() < 1e-9);
}

#[test]
fn cache_hit_rate_is_none_when_no_record_reports_cache() {
let records = vec![
test_token_record("model-a", 100, 20, 0),
test_token_record("model-a", 200, 40, 0),
];
let breakdown = build_token_breakdown(&records);
assert_eq!(breakdown.cache_hit_rate, None);
}

#[test]
fn cache_hit_rate_excludes_unreported_records_from_denominator() {
// Partial coverage: one record reports, the other does not. The
// unreported record must be excluded from BOTH numerator and
// denominator — otherwise hit rate is artificially deflated.
let records = vec![
reported_token_record("model-a", 100, 20, 80), // reports → counts
test_token_record("model-a", 9999, 1, 0), // unreported → excluded
];
let breakdown = build_token_breakdown(&records);
let rate = breakdown.cache_hit_rate.expect("hit rate present");
// 80 / 100 — the 9999 input from the unreported record must NOT bloat the denominator.
assert!((rate - 0.8).abs() < 1e-9);
}

#[test]
fn cache_hit_rate_none_when_input_sum_is_zero() {
// Edge case: reported records but their input_tokens all 0.
// Avoid divide-by-zero; surface as None.
let records = vec![reported_token_record("model-a", 0, 5, 0)];
let breakdown = build_token_breakdown(&records);
assert_eq!(breakdown.cache_hit_rate, None);
}

#[test]
fn per_model_cache_hit_rate_isolated_per_model() {
let records = vec![
reported_token_record("model-a", 100, 10, 40), // a: 40/100
reported_token_record("model-b", 200, 20, 50), // b: 50/200
];
let models = build_model_breakdown(&[], &records);
let a = models.iter().find(|m| m.model_id == "model-a").unwrap();
let b = models.iter().find(|m| m.model_id == "model-b").unwrap();
assert!((a.cache_hit_rate.unwrap() - 0.4).abs() < 1e-9);
assert!((b.cache_hit_rate.unwrap() - 0.25).abs() < 1e-9);
}
}
75 changes: 63 additions & 12 deletions src/crates/services-core/src/session_usage/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,11 @@ pub fn render_usage_report_terminal(report: &SessionUsageReport) -> String {
));
out.push(format!(
"Cached tokens: {}",
match report.tokens.cache_coverage {
UsageCacheCoverage::Available | UsageCacheCoverage::Partial => {
format_optional_number(report.tokens.cached_tokens)
}
UsageCacheCoverage::Unavailable => "not reported".to_string(),
}
format_cached_with_hit_rate(
report.tokens.cached_tokens,
&report.tokens.cache_coverage,
report.tokens.cache_hit_rate,
)
));
out.push(format!(
"Files changed: {}",
Expand Down Expand Up @@ -182,12 +181,11 @@ pub fn render_usage_report_markdown(report: &SessionUsageReport) -> String {
));
out.push_str(&format!(
"| Cached | {} |\n\n",
match report.tokens.cache_coverage {
UsageCacheCoverage::Available | UsageCacheCoverage::Partial => {
format_optional_number(report.tokens.cached_tokens)
}
UsageCacheCoverage::Unavailable => "not reported".to_string(),
}
format_cached_with_hit_rate(
report.tokens.cached_tokens,
&report.tokens.cache_coverage,
report.tokens.cache_hit_rate,
)
));

if !report.models.is_empty() {
Expand Down Expand Up @@ -322,6 +320,26 @@ fn format_optional_number(value: Option<u64>) -> String {
.unwrap_or_else(|| "unavailable".to_string())
}

/// Format the "cached tokens" cell with an optional ` (NN%)` hit-rate suffix.
/// Falls back to "not reported" when coverage is unavailable, regardless of
/// whether the hit-rate field happens to be set.
fn format_cached_with_hit_rate(
cached_tokens: Option<u64>,
coverage: &UsageCacheCoverage,
hit_rate: Option<f64>,
) -> String {
match coverage {
UsageCacheCoverage::Unavailable => "not reported".to_string(),
UsageCacheCoverage::Available | UsageCacheCoverage::Partial => {
let base = format_optional_number(cached_tokens);
match hit_rate {
Some(rate) => format!("{} ({:.0}%)", base, rate * 100.0),
None => base,
}
}
}
}

fn format_optional_duration(value: Option<u64>) -> String {
value
.map(format_duration)
Expand Down Expand Up @@ -465,6 +483,39 @@ mod tests {
assert!(!rendered.contains("secret.txt"));
}

#[test]
fn render_appends_hit_rate_suffix_to_cached_cell() {
let mut report = test_report();
// Pretend a session covered cache and 80% of input came from cache.
report.tokens.cached_tokens = Some(800);
report.tokens.cache_coverage = UsageCacheCoverage::Available;
report.tokens.cache_hit_rate = Some(0.8);

let terminal = render_usage_report_terminal(&report);
let markdown = render_usage_report_markdown(&report);

assert!(terminal.contains("Cached tokens: 800 (80%)"));
assert!(markdown.contains("| Cached | 800 (80%) |"));
}

#[test]
fn render_omits_hit_rate_suffix_when_unavailable() {
// Default test_report has Unavailable coverage + None rate. Cached cell
// should fall back to "not reported" even if hit_rate accidentally got
// populated upstream.
let mut report = test_report();
report.tokens.cache_hit_rate = Some(0.5); // would be a bug; still hidden
report.tokens.cache_coverage = UsageCacheCoverage::Unavailable;

let terminal = render_usage_report_terminal(&report);
let markdown = render_usage_report_markdown(&report);

assert!(terminal.contains("Cached tokens: not reported"));
assert!(markdown.contains("| Cached | not reported |"));
assert!(!terminal.contains("(50%)"));
assert!(!markdown.contains("(50%)"));
}

#[test]
fn render_usage_report_stays_token_only_without_billing_language() {
let report = test_report();
Expand Down
22 changes: 19 additions & 3 deletions src/crates/services-core/src/session_usage/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ use serde::{Deserialize, Serialize};

pub const SESSION_USAGE_REPORT_SCHEMA_VERSION: u16 = 1;

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
// PartialEq only (not Eq) because nested UsageTokenBreakdown/UsageModelBreakdown
// hold `cache_hit_rate: Option<f64>`.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct SessionUsageReport {
pub schema_version: u16,
Expand Down Expand Up @@ -133,7 +135,10 @@ pub enum UsageTimeDenominator {
Unavailable,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
// PartialEq only (not Eq) because `cache_hit_rate: Option<f64>` precludes
// total equality. Existing call sites compare with `==`, which works on f64
// via PartialEq (NaN-aware).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct UsageTokenBreakdown {
pub source: UsageTokenSource,
Expand All @@ -142,6 +147,10 @@ pub struct UsageTokenBreakdown {
pub total_tokens: Option<u64>,
pub cached_tokens: Option<u64>,
pub cache_coverage: UsageCacheCoverage,
/// `cached_tokens / input_tokens` over records that explicitly report
/// cached tokens. `None` when no record has cached coverage. Range: 0.0–1.0.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_hit_rate: Option<f64>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
Expand All @@ -159,7 +168,8 @@ pub enum UsageCacheCoverage {
Unavailable,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
// PartialEq only (not Eq) — see comment on UsageTokenBreakdown.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct UsageModelBreakdown {
pub model_id: String,
Expand All @@ -168,6 +178,9 @@ pub struct UsageModelBreakdown {
pub output_tokens: Option<u64>,
pub total_tokens: Option<u64>,
pub cached_tokens: Option<u64>,
/// Per-model hit rate. Same semantic as [`UsageTokenBreakdown::cache_hit_rate`].
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_hit_rate: Option<f64>,
pub duration_ms: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub sample_turn_id: Option<String>,
Expand Down Expand Up @@ -372,6 +385,7 @@ impl SessionUsageReport {
total_tokens: None,
cached_tokens: None,
cache_coverage: UsageCacheCoverage::Unavailable,
cache_hit_rate: None,
},
models: vec![],
tools: vec![],
Expand Down Expand Up @@ -445,6 +459,7 @@ pub(crate) fn test_report() -> SessionUsageReport {
total_tokens: Some(1540),
cached_tokens: None,
cache_coverage: UsageCacheCoverage::Unavailable,
cache_hit_rate: None,
};
report.models = vec![UsageModelBreakdown {
model_id: "test-model".to_string(),
Expand All @@ -453,6 +468,7 @@ pub(crate) fn test_report() -> SessionUsageReport {
output_tokens: Some(340),
total_tokens: Some(1540),
cached_tokens: None,
cache_hit_rate: None,
duration_ms: None,
sample_turn_id: Some("turn-1".to_string()),
sample_turn_index: Some(0),
Expand Down
Loading
Loading