Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions crates/rite-model/src/ir/ceremony.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,30 @@ pub struct Step {
///
/// When `true`, the executor auto-advances without waiting for user acknowledgment.
pub silent: bool,

/// How the runtime treats a *transient* failure of this step.
///
/// The DSL constrains the retry-by-default; see [`RetryPolicy`]. Absent in
/// the DSL means [`RetryPolicy::Prompt`].
pub retry: RetryPolicy,
}

/// How the runtime treats a transient (retriable) failure of a step.
///
/// Rite ceremonies always have a human at the console, so the default is to
/// pause and let the operator decide, rather than burn a fixed retry budget.
/// The DSL field constrains that default rather than enabling it.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum RetryPolicy {
/// Pause on a transient error and prompt the operator (Retry / Abort),
/// unlimited times. The default when the DSL omits `retry:`.
#[default]
Prompt,
/// Never retry: a transient error fails the step immediately. For steps
/// where repeated attempts are themselves security-relevant.
Never,
/// Retry up to this many total attempts, then fail. A hard cap.
MaxAttempts(u32),
}

/// A resolved parameter with its value.
Expand Down
8 changes: 5 additions & 3 deletions crates/rite-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ pub use types::{

pub use ir::{
Act, ActId, ArtifactId, ArtifactRef, Ceremony, Material, MaterialId, MaterialKind, Output,
OutputId, ParamId, Parameter, PostCeremonyDuty, Role, RoleId, Section, SectionId, Step, StepId,
StepInputs, SymbolTable,
OutputId, ParamId, Parameter, PostCeremonyDuty, RetryPolicy, Role, RoleId, Section, SectionId,
Step, StepId, StepInputs, SymbolTable,
};

pub use transcript::{ErrorRecord, Prompt, ResponseRecord, StepFact, StepOutcome, ValidatorSpec};
pub use transcript::{
ErrorClass, ErrorRecord, Prompt, ResponseRecord, StepFact, StepOutcome, ValidatorSpec,
};
80 changes: 77 additions & 3 deletions crates/rite-model/src/transcript.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,38 @@ pub enum ResponseRecord {
Acknowledged,
}

/// Audit classification of a bad outcome, recorded so an auditor can tell the
/// nature of a failure apart without parsing the free-form `message`.
///
/// This is the *audit* taxonomy (what an auditor sees), distinct from the
/// runtime's `Retriability` (whether a step may re-run). For a backend error
/// the two align: a retriable error is `Environmental`. They are kept separate
/// because some classes never map cleanly onto retriability (an `Abort` is a
/// decision, not an error).
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ErrorClass {
/// The world wasn't ready; the step's work did not happen (token absent,
/// loose cable, PIN required).
Environmental,
/// The ceremony's own logic concluded badly (a verification mismatch, a
/// refused attestation). A result, not a recoverable condition.
Procedural,
/// The run itself is compromised or the definition is broken (transcript
/// write failed, channel lost, unknown action, invalid params).
Integrity,
/// The operator chose to stop. Not an error at all, but recorded on the
/// terminal fact so abort is distinguishable from failure.
Abort,
}

/// Structured error record for transcript serialization.
#[non_exhaustive]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ErrorRecord {
/// Audit classification of this error.
pub class: ErrorClass,
/// Stable kind label (e.g. `aborted`, `step_failed`, `material_load_failed`).
pub kind: String,
/// Human-readable message.
Expand All @@ -113,8 +141,9 @@ pub struct ErrorRecord {

impl ErrorRecord {
/// Construct an error record.
pub fn new(kind: impl Into<String>, message: impl Into<String>) -> Self {
pub fn new(class: ErrorClass, kind: impl Into<String>, message: impl Into<String>) -> Self {
Self {
class,
kind: kind.into(),
message: message.into(),
}
Expand Down Expand Up @@ -226,6 +255,18 @@ pub enum StepFact {
/// Verbatim deviation text.
text: String,
},
/// A step attempt failed. Recorded per attempt, so a retried step shows
/// `StepAttemptFailed{attempt: 1}` followed by the operator's retry
/// decision and, on success, `StepCompleted`. The final attempt of a step
/// that the run gives up on is followed by the terminal `CeremonyFailed`.
StepAttemptFailed {
/// Step whose attempt failed.
step: StepId,
/// 1-based attempt number within this step.
attempt: u32,
/// Structured error record for the failed attempt.
error: ErrorRecord,
},
/// Step finished executing.
StepCompleted {
/// Step identifier.
Expand Down Expand Up @@ -629,11 +670,44 @@ mod schema_snapshot_tests {
fn ceremony_failed() {
assert_json(
&StepFact::CeremonyFailed {
error: ErrorRecord::new("aborted", "ceremony aborted by operator"),
error: ErrorRecord::new(
ErrorClass::Abort,
"aborted",
"ceremony aborted by operator",
),
},
&json!({
"type": "ceremony_failed",
"error": { "kind": "aborted", "message": "ceremony aborted by operator" },
"error": {
"class": "abort",
"kind": "aborted",
"message": "ceremony aborted by operator",
},
}),
);
}

#[test]
fn step_attempt_failed() {
assert_json(
&StepFact::StepAttemptFailed {
step: StepId::new("import_key"),
attempt: 1,
error: ErrorRecord::new(
ErrorClass::Environmental,
"backend_error",
"Token not present",
),
},
&json!({
"type": "step_attempt_failed",
"step": "import_key",
"attempt": 1,
"error": {
"class": "environmental",
"kind": "backend_error",
"message": "Token not present",
},
}),
);
}
Expand Down
43 changes: 42 additions & 1 deletion crates/rite-render/src/report/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! HTML renderer and, in the future, by template engines.

use chrono::{DateTime, Duration, Utc};
use rite_model::{StepFact, StepOutcome};
use rite_model::{ErrorClass, StepFact, StepOutcome};
use serde::Serialize;
use std::collections::HashMap;

Expand Down Expand Up @@ -52,12 +52,30 @@ pub enum ReportStatus {
/// Failure summary, extracted from `CeremonyFailed`.
#[derive(Debug, Clone, Serialize)]
pub struct ReportFailure {
/// Audit classification of the failure.
pub class: ErrorClass,
/// Stable error kind label.
pub kind: String,
/// Human-readable message.
pub message: String,
}

/// A failed attempt of a step, recorded from `StepAttemptFailed`. Present when
/// a step was retried (or failed terminally after exhausting its retries).
#[derive(Debug, Clone, Serialize)]
pub struct ReportAttempt {
/// 1-based attempt number within the step.
pub attempt: u32,
/// Audit classification of the attempt's error.
pub class: ErrorClass,
/// Stable error kind label.
pub kind: String,
/// Human-readable message.
pub message: String,
/// UTC timestamp when the attempt failed.
pub failed_at: DateTime<Utc>,
}

/// Execution record for a single ceremony step.
#[derive(Debug, Clone, Serialize)]
pub struct ReportStep {
Expand All @@ -76,6 +94,8 @@ pub struct ReportStep {
pub outcome_status: String,
/// Message attached to the outcome, if any.
pub outcome_message: Option<String>,
/// Failed attempts before this step succeeded or was given up on.
pub attempts: Vec<ReportAttempt>,
}

/// An artifact produced during the ceremony.
Expand Down Expand Up @@ -170,8 +190,28 @@ impl Builder {
completed_at: None,
outcome_status: "in_progress".to_string(),
outcome_message: None,
attempts: Vec::new(),
});
}
StepFact::StepAttemptFailed {
step,
attempt,
error,
} => {
if let Some(report_step) = self
.step_index
.get(step.as_str())
.and_then(|i| self.steps.get_mut(*i))
{
report_step.attempts.push(ReportAttempt {
attempt: *attempt,
class: error.class,
kind: error.kind.clone(),
message: error.message.clone(),
failed_at: at,
});
}
}
StepFact::StepCompleted { id, outcome } => {
if let Some(step) = self
.step_index
Expand Down Expand Up @@ -215,6 +255,7 @@ impl Builder {
self.status = ReportStatus::Failed;
self.completed_at = Some(at);
self.failure = Some(ReportFailure {
class: error.class,
kind: error.kind.clone(),
message: error.message.clone(),
});
Expand Down
50 changes: 50 additions & 0 deletions crates/rite-render/src/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,8 @@ pub struct ReportView {
pub transcript_fingerprint: String,
/// Failure summary, when the ceremony failed.
pub failure: Option<FailureView>,
/// Failed step attempts (retries), across all steps.
pub attempts: Vec<AttemptView>,
/// Recorded deviations.
pub deviations: Vec<DeviationView>,
/// Produced artifacts.
Expand All @@ -566,12 +568,31 @@ pub struct ReportView {
/// A failure summary in a report.
#[derive(Debug, Clone, Serialize)]
pub struct FailureView {
/// Audit classification (environmental / procedural / integrity / abort).
pub class: String,
/// Stable error kind label.
pub kind: String,
/// Human-readable message.
pub message: String,
}

/// A failed step attempt (a retry) in a report.
#[derive(Debug, Clone, Serialize)]
pub struct AttemptView {
/// Step that failed.
pub step_id: String,
/// 1-based attempt number within the step.
pub attempt: u32,
/// Audit classification of the attempt's error.
pub class: String,
/// Stable error kind label.
pub kind: String,
/// Human-readable message.
pub message: String,
/// Formatted timestamp.
pub recorded: String,
}

/// A recorded deviation in a report.
#[derive(Debug, Clone, Serialize)]
pub struct DeviationView {
Expand Down Expand Up @@ -638,6 +659,20 @@ impl ReportView {
sha256: a.sha256.clone(),
})
.collect();
let attempts = data
.steps
.iter()
.flat_map(|s| {
s.attempts.iter().map(move |a| AttemptView {
step_id: s.step_id.clone(),
attempt: a.attempt,
class: error_class_label(a.class).to_string(),
kind: a.kind.clone(),
message: a.message.clone(),
recorded: format_datetime(&a.failed_at),
})
})
.collect();
let steps = data
.steps
.iter()
Expand Down Expand Up @@ -668,9 +703,11 @@ impl ReportView {
.map(|secs| crate::report::data::format_duration(Duration::seconds(secs))),
transcript_fingerprint: data.transcript_fingerprint.clone(),
failure: data.failure.as_ref().map(|f| FailureView {
class: error_class_label(f.class).to_string(),
kind: f.kind.clone(),
message: f.message.clone(),
}),
attempts,
deviations,
artifacts,
steps,
Expand All @@ -688,6 +725,19 @@ fn status_slug(status: crate::report::ReportStatus) -> &'static str {
}
}

fn error_class_label(class: rite_model::ErrorClass) -> &'static str {
use rite_model::ErrorClass;
match class {
ErrorClass::Environmental => "environmental",
ErrorClass::Procedural => "procedural",
ErrorClass::Integrity => "integrity",
ErrorClass::Abort => "abort",
// `ErrorClass` is `#[non_exhaustive]`; a new variant renders generically
// until it is given a label here.
_ => "unknown",
}
}

fn format_datetime(dt: &DateTime<Utc>) -> String {
dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
}
Expand Down
14 changes: 13 additions & 1 deletion crates/rite-render/templates/report.html.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,22 @@
{%- endif %}
<p><strong>Transcript fingerprint:</strong> <code>{{ report.transcript_fingerprint }}</code></p>
{%- if report.failure %}
<p><strong>Failure:</strong> {{ report.failure.message }} ({{ report.failure.kind }})</p>
<p><strong>Failure:</strong> {{ report.failure.message }} ({{ report.failure.kind }}, {{ report.failure.class }})</p>
{%- endif %}
</div>

{%- if report.attempts %}
<h2>Failed Attempts</h2>
<table class="attempts">
<thead><tr><th>Recorded</th><th>Step</th><th>Attempt</th><th>Class</th><th>Error</th></tr></thead>
<tbody>
{%- for a in report.attempts %}
<tr><td>{{ a.recorded }}</td><td><code>{{ a.step_id }}</code></td><td>{{ a.attempt }}</td><td>{{ a.class }}</td><td>{{ a.message }} ({{ a.kind }})</td></tr>
{%- endfor %}
</tbody>
</table>
{%- endif %}

{%- if report.deviations %}
<h2>Deviations</h2>
<table class="deviations">
Expand Down
6 changes: 4 additions & 2 deletions crates/rite-render/templates/themes/formal.css
Original file line number Diff line number Diff line change
Expand Up @@ -423,10 +423,12 @@ ul.duty-items li {
.summary-box p {
margin: 4px 0;
}
.deviations {
.deviations,
.attempts {
border-left: 3px solid #9a1f1f;
}
.deviations thead th {
.deviations thead th,
.attempts thead th {
border-bottom-color: #9a1f1f;
}
code, .hash {
Expand Down
1 change: 1 addition & 0 deletions crates/rite-resolver/src/diagnostic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ impl SpanMap {
| ResolveError::UnknownArtifact { step, .. }
| ResolveError::MissingRequiredBackend { step, .. }
| ResolveError::MissingWithField { step, .. }
| ResolveError::InvalidRetryAttempts { step }
| ResolveError::ArtifactNeverProduced { step, .. } => self.steps.get(step).copied(),
ResolveError::UndeclaredBackend { step, backend } => self
.span_for_reference(
Expand Down
Loading