diff --git a/Cargo.lock b/Cargo.lock index 5c928b19a3..cd8ebcdd0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2301,6 +2301,7 @@ dependencies = [ "carbide-utils", "carbide-uuid", "chrono", + "component-manager", "config-version", "duration-str", "eyre", @@ -2619,15 +2620,18 @@ name = "carbide-rack-controller" version = "0.0.0" dependencies = [ "async-trait", + "bmc-vendor", "carbide-api-db", "carbide-api-model", "carbide-health-metrics", + "carbide-health-report", "carbide-rack", "carbide-secrets", "carbide-test-support", "carbide-utils", "carbide-uuid", "chrono", + "component-manager", "config-version", "duration-str", "eyre", @@ -2638,8 +2642,10 @@ dependencies = [ "serde_json", "sqlx", "state-controller", + "tokio", "tonic", "tracing", + "uuid", ] [[package]] diff --git a/crates/api-core/src/handlers/component_manager.rs b/crates/api-core/src/handlers/component_manager.rs index bb91ef849c..0a63e4593b 100644 --- a/crates/api-core/src/handlers/component_manager.rs +++ b/crates/api-core/src/handlers/component_manager.rs @@ -29,7 +29,11 @@ use carbide_uuid::power_shelf::PowerShelfId; use carbide_uuid::rack::RackId; use carbide_uuid::switch::SwitchId; use component_manager::component_manager::ComponentManager; -use component_manager::compute_tray_manager::{ComputeTrayEndpoint, ComputeTrayVendor}; +use component_manager::compute_tray_manager::{ + Backend as ComputeBackend, ComputeTrayAuthentication, ComputeTrayEndpoint, ComputeTrayManager, + ComputeTrayResult, ComputeTrayVendor, +}; +use component_manager::core_compute_manager::CoreComputeTrayManager; use component_manager::error::ComponentManagerError; use component_manager::nv_switch_manager::SwitchEndpoint; use component_manager::power_shelf_manager::{PowerShelfEndpoint, PowerShelfVendor}; @@ -43,7 +47,7 @@ use model::component_manager::{ }; use model::machine::Machine; use model::machine::machine_search_config::MachineSearchConfig; -use model::rack::{FirmwareUpgradeJob, MaintenanceActivity}; +use model::rack::{FirmwareUpgradeJob, MaintenanceActivity, MaintenanceScope}; use model::switch::SwitchMaintenanceOperation; use tonic::{Code, Request, Response, Status}; @@ -352,6 +356,213 @@ async fn queue_switch_power_control_via_state_controller( Ok(results) } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ComputePowerRoute { + CoreDirect, + ConfiguredDirect, + RackStateController, +} + +fn compute_power_route( + rack_associated: bool, + configured_backend: ComputeBackend, + bypass_state_controller: bool, +) -> ComputePowerRoute { + if !rack_associated { + return ComputePowerRoute::CoreDirect; + } + + if configured_backend == ComputeBackend::Rms && !bypass_state_controller { + ComputePowerRoute::RackStateController + } else { + ComputePowerRoute::ConfiguredDirect + } +} + +struct RackPowerMaintenanceTarget { + rack_id: RackId, + machine_ids: Vec, +} + +#[derive(Default)] +struct ComputePowerTargets { + core_direct: Vec, + configured_direct: Vec, + rack_state_controller: Vec, + errors: Vec, +} + +fn push_rack_power_target( + targets: &mut Vec, + rack_id: RackId, + machine_id: MachineId, +) { + if let Some(target) = targets.iter_mut().find(|target| target.rack_id == rack_id) { + target.machine_ids.push(machine_id); + return; + } + + targets.push(RackPowerMaintenanceTarget { + rack_id, + machine_ids: vec![machine_id], + }); +} + +fn deduplicate_compute_machine_ids( + machine_ids: &[MachineId], +) -> (Vec, Vec) { + let mut unique_machine_ids = Vec::with_capacity(machine_ids.len()); + let mut seen_machine_ids = HashSet::with_capacity(machine_ids.len()); + let mut duplicate_errors = Vec::new(); + for &machine_id in machine_ids { + if seen_machine_ids.insert(machine_id) { + unique_machine_ids.push(machine_id); + } else { + duplicate_errors.push(status_result( + &machine_id.to_string(), + Status::invalid_argument(format!( + "duplicate machine id {machine_id} in power-control request" + )), + )); + } + } + (unique_machine_ids, duplicate_errors) +} + +async fn classify_compute_power_targets( + api: &Api, + machine_ids: &[MachineId], + configured_backend: ComputeBackend, + bypass_state_controller: bool, +) -> Result { + let (unique_machine_ids, duplicate_errors) = deduplicate_compute_machine_ids(machine_ids); + + let machines = db::machine::find( + api.db_reader().as_mut(), + db::ObjectFilter::List(&unique_machine_ids), + MachineSearchConfig::default(), + ) + .await + .map_err(|error| Status::internal(format!("failed to look up machines: {error}")))?; + let machines_by_id: HashMap<_, _> = machines + .into_iter() + .map(|machine| (machine.id, machine)) + .collect(); + + let mut targets = ComputePowerTargets { + errors: duplicate_errors, + ..Default::default() + }; + for machine_id in &unique_machine_ids { + let Some(machine) = machines_by_id.get(machine_id) else { + targets.errors.push(not_found_component_result( + &machine_id.to_string(), + format!("machine {machine_id} not found"), + )); + continue; + }; + + match compute_power_route( + machine.rack_id.is_some(), + configured_backend, + bypass_state_controller, + ) { + ComputePowerRoute::CoreDirect => targets.core_direct.push(*machine_id), + ComputePowerRoute::ConfiguredDirect => targets.configured_direct.push(*machine_id), + ComputePowerRoute::RackStateController => { + if let Some(rack_id) = machine.rack_id.clone() { + push_rack_power_target( + &mut targets.rack_state_controller, + rack_id, + *machine_id, + ); + } else { + targets.core_direct.push(*machine_id); + } + } + } + } + + Ok(targets) +} + +fn rack_power_maintenance_scope( + machine_ids: Vec, + action: PowerAction, +) -> MaintenanceScope { + MaintenanceScope { + maintenance_request_id: None, + machine_ids, + switch_ids: Vec::new(), + power_shelf_ids: Vec::new(), + activities: vec![MaintenanceActivity::PowerControl { action }], + } +} + +async fn queue_one_rack_power_control( + api: &Api, + target: &RackPowerMaintenanceTarget, + action: PowerAction, +) -> Result<(), Status> { + let mut txn = api.txn_begin().await?; + let rack = + sqlx::query_as::<_, model::rack::Rack>("SELECT * FROM racks WHERE id = $1 FOR UPDATE") + .bind(&target.rack_id) + .fetch_optional(&mut txn) + .await + .map_err(|error| { + Status::internal(format!("failed to lock rack {}: {error}", target.rack_id)) + })? + .ok_or_else(|| Status::not_found(format!("rack {} not found", target.rack_id)))?; + + rack.check_accepts_maintenance().map_err(|reason| { + Status::failed_precondition(format!( + "rack {} cannot accept compute power control: {reason}", + target.rack_id + )) + })?; + + let mut config = rack.config.clone(); + config.power_control_dispatch_started_at = None; + config.maintenance_requested = Some(rack_power_maintenance_scope( + target.machine_ids.clone(), + action, + )); + db::rack::update(&mut txn, &target.rack_id, &config) + .await + .map_err(CarbideError::from)?; + txn.commit().await?; + Ok(()) +} + +async fn queue_compute_power_control_via_rack_state_controller( + api: &Api, + targets: &[RackPowerMaintenanceTarget], + action: PowerAction, +) -> Vec { + let result_count = targets.iter().map(|target| target.machine_ids.len()).sum(); + let mut results = Vec::with_capacity(result_count); + + for target in targets { + match queue_one_rack_power_control(api, target, action).await { + Ok(()) => results.extend( + target + .machine_ids + .iter() + .map(|machine_id| success_result(&machine_id.to_string())), + ), + Err(status) => results.extend( + target + .machine_ids + .iter() + .map(|machine_id| status_result(&machine_id.to_string(), status.clone())), + ), + } + } + + results +} + /// Maps raw proto `ComputeTrayComponent` values to display-name strings. /// /// Keep in sync with `format_compute_tray_component` in @@ -1083,6 +1294,7 @@ fn map_bmc_vendor_to_compute_tray(vendor: bmc_vendor::BMCVendor) -> ComputeTrayV bmc_vendor::BMCVendor::Dell => ComputeTrayVendor::Dell, bmc_vendor::BMCVendor::Hpe => ComputeTrayVendor::Hpe, bmc_vendor::BMCVendor::Lenovo => ComputeTrayVendor::Lenovo, + bmc_vendor::BMCVendor::LenovoAMI => ComputeTrayVendor::LenovoAmi, bmc_vendor::BMCVendor::Supermicro => ComputeTrayVendor::Supermicro, bmc_vendor::BMCVendor::Nvidia => ComputeTrayVendor::Nvidia, _ => ComputeTrayVendor::Unknown, @@ -1116,6 +1328,33 @@ async fn resolve_compute_tray_endpoints( let mut endpoints = Vec::with_capacity(machine_ids.len()); let mut ip_to_machine_id = HashMap::with_capacity(machine_ids.len()); let mut unresolved = Vec::new(); + let mut machine_ids_by_bmc_ip: HashMap> = HashMap::new(); + for &machine_id in machine_ids { + if let Some(bmc_ip) = machine_by_id + .get(&machine_id) + .and_then(|machine| machine.bmc_info.ip) + { + machine_ids_by_bmc_ip + .entry(bmc_ip) + .or_default() + .push(machine_id); + } + } + let duplicate_bmc_ip_reasons: HashMap<_, _> = machine_ids_by_bmc_ip + .into_iter() + .filter(|(_, machine_ids)| machine_ids.len() > 1) + .map(|(bmc_ip, machine_ids)| { + let machine_ids = machine_ids + .iter() + .map(ToString::to_string) + .collect::>() + .join(", "); + ( + bmc_ip, + format!("BMC IP {bmc_ip} is shared by machines [{machine_ids}]"), + ) + }) + .collect(); for &machine_id in machine_ids { let Some(machine) = machine_by_id.get(&machine_id) else { @@ -1126,18 +1365,25 @@ async fn resolve_compute_tray_endpoints( continue; }; - let Some(bmc_mac) = machine.bmc_info.mac else { + let Some(bmc_ip) = machine.bmc_info.ip else { unresolved.push(UnresolvedDevice { id: machine_id, - reason: "BMC MAC not available".into(), + reason: "BMC IP not configured".into(), }); continue; }; + if let Some(reason) = duplicate_bmc_ip_reasons.get(&bmc_ip) { + unresolved.push(UnresolvedDevice { + id: machine_id, + reason: reason.clone(), + }); + continue; + } - let Some(bmc_ip) = machine.bmc_info.ip else { + let Some(bmc_mac) = machine.bmc_info.mac else { unresolved.push(UnresolvedDevice { id: machine_id, - reason: "BMC IP not configured".into(), + reason: "BMC MAC not available".into(), }); continue; }; @@ -1164,7 +1410,8 @@ async fn resolve_compute_tray_endpoints( endpoints.push(ComputeTrayEndpoint { vendor, bmc_ip, - bmc_credentials, + bmc_port: machine.bmc_info.port, + authentication: ComputeTrayAuthentication::Credentials(bmc_credentials), }); } @@ -1184,6 +1431,59 @@ async fn resolve_compute_tray_endpoints( }) } +fn reconcile_direct_compute_power_results( + dispatch_endpoints: &[ComputeTrayEndpoint], + ip_to_machine_id: &HashMap, + backend_results: Vec, +) -> Vec { + let expected_ips: HashSet<_> = dispatch_endpoints + .iter() + .map(|endpoint| endpoint.bmc_ip) + .collect(); + let mut results = Vec::with_capacity(dispatch_endpoints.len()); + let mut backend_results_by_ip: HashMap<_, Vec<_>> = + HashMap::with_capacity(backend_results.len()); + + for result in backend_results { + let id = ip_to_machine_id + .get(&result.bmc_ip) + .map(ToString::to_string) + .unwrap_or_else(|| result.bmc_ip.to_string()); + + if !expected_ips.contains(&result.bmc_ip) { + results.push(error_result( + &id, + "backend returned an unexpected compute-tray result".into(), + )); + continue; + } + backend_results_by_ip + .entry(result.bmc_ip) + .or_default() + .push(result); + } + + for endpoint in dispatch_endpoints { + let bmc_ip = endpoint.bmc_ip; + let id = ip_to_machine_id + .get(&bmc_ip) + .map(ToString::to_string) + .unwrap_or_else(|| bmc_ip.to_string()); + let result = match backend_results_by_ip.get(&bmc_ip).map(Vec::as_slice) { + None | Some([]) => error_result(&id, "backend returned no compute-tray result".into()), + Some([result]) if result.success => success_result(&id), + Some([result]) => error_result(&id, result.error.clone().unwrap_or_default()), + Some(_) => error_result( + &id, + "backend returned duplicate compute-tray results".into(), + ), + }; + results.push(result); + } + + results +} + fn switch_mac_to_id_str(mac: &MacAddress, mac_to_id: &HashMap) -> String { mac_to_id .get(mac) @@ -1211,6 +1511,115 @@ fn map_fw_state(state: model::component_manager::FirmwareState) -> i32 { } } +async fn dispatch_compute_power_direct( + api: &Api, + manager: &dyn ComputeTrayManager, + machine_ids: &[MachineId], + action: PowerAction, +) -> Result<(Vec, Vec), Status> { + if machine_ids.is_empty() { + return Ok((Vec::new(), Vec::new())); + } + + let resolved = resolve_compute_tray_endpoints(api, machine_ids).await?; + let mut results: Vec<_> = resolved + .unresolved + .iter() + .map(|unresolved| error_result(&unresolved.id.to_string(), unresolved.reason.clone())) + .collect(); + // Direct dispatch retains the legacy request-time bookkeeping. Rack state-controller + // targets intentionally skip this helper because the queued scope carries its desired + // action and may execute later; execution-time alert suppression belongs with that + // controller path. + let desired_state = desired_power_state(action) as i32; + let mut overrides_inserted = Vec::new(); + let mut dispatch_endpoints = Vec::with_capacity(resolved.resolved.endpoints.len()); + for endpoint in &resolved.resolved.endpoints { + let Some(&machine_id) = resolved.resolved.ip_to_machine_id.get(&endpoint.bmc_ip) else { + results.push(error_result( + &endpoint.bmc_ip.to_string(), + "resolved compute tray has no machine mapping".into(), + )); + continue; + }; + + if power_control_health_override(api, machine_id, true).await { + overrides_inserted.push(machine_id); + } + + let power_req = rpc::PowerOptionUpdateRequest { + machine_id: Some(machine_id), + power_state: desired_state, + }; + match crate::handlers::power_options::update_power_option(api, Request::new(power_req)) + .await + { + Ok(_) => dispatch_endpoints.push(endpoint.clone()), + Err(error) + if error.code() == Code::InvalidArgument + && error.message().contains("already set as") => + { + tracing::debug!( + %machine_id, + desired_state, + "power option already in desired state, skipping" + ); + dispatch_endpoints.push(endpoint.clone()); + } + Err(error) => results.push(error_result( + &machine_id.to_string(), + format!("failed to update power option: {error}"), + )), + } + } + + tracing::info!( + backend = manager.name(), + count = dispatch_endpoints.len(), + ?action, + "power control for compute trays" + ); + let backend_result = if dispatch_endpoints.is_empty() { + Ok(Vec::new()) + } else { + manager.power_control(&dispatch_endpoints, action).await + }; + + // Do not leak an override if the backend returns an operation-level error. + for machine_id in overrides_inserted { + power_control_health_override(api, machine_id, false).await; + } + + let ips = dispatch_endpoints + .iter() + .map(|endpoint| endpoint.bmc_ip) + .collect(); + let backend_results = match backend_result { + Ok(results) => results, + Err(error) => { + let status = component_manager_error_to_status(error); + results.extend(dispatch_endpoints.iter().map(|endpoint| { + let id = resolved + .resolved + .ip_to_machine_id + .get(&endpoint.bmc_ip) + .map(ToString::to_string) + .unwrap_or_else(|| endpoint.bmc_ip.to_string()); + status_result(&id, status.clone()) + })); + return Ok((results, ips)); + } + }; + + results.extend(reconcile_direct_compute_power_results( + &dispatch_endpoints, + &resolved.resolved.ip_to_machine_id, + backend_results, + )); + + Ok((results, ips)) +} + // ---- Power Control ---- pub(crate) async fn component_power_control( @@ -1318,107 +1727,50 @@ pub(crate) async fn component_power_control( (results, ips) } rpc::component_power_control_request::Target::MachineIds(list) => { - if cm.compute_tray_use_state_controller && !bypass_state_controller { - // TODO: implement state controller path for compute tray power control - return Err(Status::unimplemented( - "compute tray power control through the state controller is not yet supported", - )); - } else { - let resolved = resolve_compute_tray_endpoints(api, &list.machine_ids).await?; - - let mut results: Vec<_> = resolved - .unresolved - .iter() - .map(|u| error_result(&u.id.to_string(), u.reason.clone())) - .collect(); - - let resolved_machine_ids: Vec<_> = resolved - .resolved - .endpoints - .iter() - .filter_map(|ep| resolved.resolved.ip_to_machine_id.get(&ep.bmc_ip).copied()) - .collect(); - - // Insert health overrides and update power-manager desired state - // before issuing Redfish commands. - let desired_state = desired_power_state(action) as i32; - let mut overrides_inserted = Vec::new(); - for &machine_id in &resolved_machine_ids { - let inserted = power_control_health_override(api, machine_id, true).await; - if inserted { - overrides_inserted.push(machine_id); - } - - let power_req = rpc::PowerOptionUpdateRequest { - machine_id: Some(machine_id), - power_state: desired_state, - }; - match crate::handlers::power_options::update_power_option( - api, - Request::new(power_req), - ) - .await - { - Ok(_) => {} - Err(e) - if e.code() == Code::InvalidArgument - && e.message().contains("already set as") => - { - tracing::debug!( - %machine_id, - desired_state, - "power option already in desired state, skipping" - ); - } - Err(e) => { - results.push(error_result( - &machine_id.to_string(), - format!("failed to update power option: {e}"), - )); - } - } - } - - tracing::info!( - backend = cm.compute_tray.name(), - count = resolved.resolved.endpoints.len(), - ?action, - "power control for compute trays" - ); - let backend_results = cm - .compute_tray - .power_control(&resolved.resolved.endpoints, action) - .await - .map_err(component_manager_error_to_status)?; - - // Clear health overrides after Redfish dispatch. - for machine_id in &overrides_inserted { - power_control_health_override(api, *machine_id, false).await; - } + let ComputePowerTargets { + core_direct, + configured_direct, + rack_state_controller, + errors: mut results, + } = classify_compute_power_targets( + api, + &list.machine_ids, + cm.compute_tray.backend(), + bypass_state_controller, + ) + .await?; + let mut exploration_ips = Vec::new(); + + if !core_direct.is_empty() { + let core_manager = CoreComputeTrayManager::new(api.redfish_pool.clone()); + let (direct_results, direct_ips) = + dispatch_compute_power_direct(api, &core_manager, &core_direct, action).await?; + results.extend(direct_results); + exploration_ips.extend(direct_ips); + } - let ips: Vec = resolved - .resolved - .endpoints - .iter() - .map(|ep| ep.bmc_ip) - .collect(); + if !configured_direct.is_empty() { + let (direct_results, direct_ips) = dispatch_compute_power_direct( + api, + cm.compute_tray.as_ref(), + &configured_direct, + action, + ) + .await?; + results.extend(direct_results); + exploration_ips.extend(direct_ips); + } - results.extend(backend_results.into_iter().map(|r| { - let id = resolved - .resolved - .ip_to_machine_id - .get(&r.bmc_ip) - .map(|id| id.to_string()) - .unwrap_or_else(|| r.bmc_ip.to_string()); - if r.success { - success_result(&id) - } else { - error_result(&id, r.error.unwrap_or_default()) - } - })); + results.extend( + queue_compute_power_control_via_rack_state_controller( + api, + &rack_state_controller, + action, + ) + .await, + ); - (results, ips) - } + (results, exploration_ips) } }; @@ -2546,6 +2898,153 @@ mod tests { assert_eq!(err.code(), Code::InvalidArgument); } + #[test] + fn compute_power_route_selects_backend_per_machine() { + use carbide_test_support::value_scenarios; + + value_scenarios!( + run = |(rack_associated, backend, bypass_state_controller)| { + compute_power_route(rack_associated, backend, bypass_state_controller) + }; + "standalone machines always use the direct Core wrapper" { + (false, ComputeBackend::Rms, false) => ComputePowerRoute::CoreDirect, + (false, ComputeBackend::Mock, false) => ComputePowerRoute::CoreDirect, + (false, ComputeBackend::Core, true) => ComputePowerRoute::CoreDirect, + } + + "rack RMS machines always use the state controller unless bypassed" { + (true, ComputeBackend::Rms, false) => ComputePowerRoute::RackStateController, + (true, ComputeBackend::Rms, true) => ComputePowerRoute::ConfiguredDirect, + } + + "rack machines on other configured backends dispatch directly" { + (true, ComputeBackend::Core, false) => ComputePowerRoute::ConfiguredDirect, + (true, ComputeBackend::Mock, false) => ComputePowerRoute::ConfiguredDirect, + } + ); + } + + #[test] + fn compute_power_duplicate_machine_ids_are_not_dispatched_twice() { + let machine_id_a = MachineId::new( + carbide_uuid::machine::MachineIdSource::ProductBoardChassisSerial, + [0; 32], + carbide_uuid::machine::MachineType::Host, + ); + let machine_id_b = MachineId::new( + carbide_uuid::machine::MachineIdSource::ProductBoardChassisSerial, + [1; 32], + carbide_uuid::machine::MachineType::Host, + ); + + let (unique, errors) = + deduplicate_compute_machine_ids(&[machine_id_a, machine_id_b, machine_id_a]); + + assert_eq!(unique, vec![machine_id_a, machine_id_b]); + assert_eq!(errors.len(), 1); + assert_eq!(errors[0].component_id, machine_id_a.to_string()); + assert_eq!( + errors[0].status, + rpc::ComponentManagerStatusCode::InvalidArgument as i32 + ); + assert!(errors[0].error.contains("duplicate machine id")); + } + + #[test] + fn direct_compute_power_results_are_reconciled_per_expected_machine() { + struct Case { + scenario: &'static str, + backend_results: Vec, + expected: Vec<(String, i32, &'static str)>, + } + + let machine_id = MachineId::new( + carbide_uuid::machine::MachineIdSource::ProductBoardChassisSerial, + [0; 32], + carbide_uuid::machine::MachineType::Host, + ); + let expected_ip: IpAddr = "192.0.2.10".parse().unwrap(); + let unexpected_ip: IpAddr = "192.0.2.99".parse().unwrap(); + let endpoints = [ComputeTrayEndpoint { + vendor: ComputeTrayVendor::Dell, + bmc_ip: expected_ip, + bmc_port: None, + authentication: ComputeTrayAuthentication::Credentials(Credentials::UsernamePassword { + username: "user".into(), + password: "password".into(), + }), + }]; + let ip_to_machine_id = HashMap::from([(expected_ip, machine_id)]); + let success = |bmc_ip| ComputeTrayResult { + bmc_ip, + success: true, + error: None, + }; + let internal_error = rpc::ComponentManagerStatusCode::InternalError as i32; + let machine_id = machine_id.to_string(); + + for case in [ + Case { + scenario: "duplicate results fail the target exactly once", + backend_results: vec![success(expected_ip), success(expected_ip)], + expected: vec![(machine_id.clone(), internal_error, "duplicate")], + }, + Case { + scenario: "missing results fail the target", + backend_results: vec![], + expected: vec![(machine_id.clone(), internal_error, "no compute-tray result")], + }, + Case { + scenario: "unexpected results are reported without hiding the missing target", + backend_results: vec![success(unexpected_ip)], + expected: vec![ + (unexpected_ip.to_string(), internal_error, "unexpected"), + (machine_id, internal_error, "no compute-tray result"), + ], + }, + ] { + let actual = reconcile_direct_compute_power_results( + &endpoints, + &ip_to_machine_id, + case.backend_results, + ); + assert_eq!(actual.len(), case.expected.len(), "{}", case.scenario); + for (actual, (expected_id, expected_status, expected_error)) in + actual.iter().zip(case.expected) + { + assert_eq!(actual.component_id, expected_id, "{}", case.scenario); + assert_eq!(actual.status, expected_status, "{}", case.scenario); + assert!( + actual.error.contains(expected_error), + "{}: {:?} should contain {:?}", + case.scenario, + actual.error, + expected_error, + ); + } + } + } + + #[test] + fn compute_power_rack_maintenance_scope_targets_only_requested_machines() { + let machine_id = MachineId::new( + carbide_uuid::machine::MachineIdSource::ProductBoardChassisSerial, + [0; 32], + carbide_uuid::machine::MachineType::Host, + ); + let scope = rack_power_maintenance_scope(vec![machine_id], PowerAction::ForceOff); + + assert_eq!(scope.machine_ids, vec![machine_id]); + assert!(scope.switch_ids.is_empty()); + assert!(scope.power_shelf_ids.is_empty()); + assert_eq!( + scope.activities, + vec![MaintenanceActivity::PowerControl { + action: PowerAction::ForceOff, + }] + ); + } + #[test] fn firmware_job_state_explicit_status_wins_for_empty_job() { let job = FirmwareUpgradeJob { diff --git a/crates/api-core/src/handlers/rack.rs b/crates/api-core/src/handlers/rack.rs index 4e4bcf91c8..14c39ad4d6 100644 --- a/crates/api-core/src/handlers/rack.rs +++ b/crates/api-core/src/handlers/rack.rs @@ -35,7 +35,7 @@ use futures_util::FutureExt; use health_report::HealthReportApplyMode; use model::machine::machine_search_config::MachineSearchConfig; use model::metadata::Metadata; -use model::rack::{MaintenanceActivity, MaintenanceScope, RackState}; +use model::rack::{MaintenanceActivity, MaintenanceScope, Rack, RackState}; use tonic::{Request, Response, Status}; use crate::CarbideError; @@ -227,20 +227,22 @@ pub async fn admin_force_delete_rack( let mut txn = api.txn_begin().await?; - let rack_list = db_rack::find_by( - &mut txn, - ObjectColumnFilter::One(db_rack::IdColumn, &rack_id), - ) - .await - .map_err(CarbideError::from)?; - - if rack_list.is_empty() { - return Err(CarbideError::NotFoundError { + let rack = sqlx::query_as::<_, Rack>("SELECT * FROM racks WHERE id = $1 FOR UPDATE") + .bind(&rack_id) + .fetch_optional(&mut txn) + .await + .map_err(|error| CarbideError::Internal { + message: format!("failed to lock rack {rack_id} for force delete: {error}"), + })? + .ok_or_else(|| CarbideError::NotFoundError { kind: "rack", id: rack_id.to_string(), - } - .into()); - } + })?; + let maintenance_request_id = rack + .config + .maintenance_requested + .as_ref() + .and_then(|scope| scope.maintenance_request_id.as_deref()); db::state_history::delete_by_object_id( &mut txn, @@ -258,7 +260,10 @@ pub async fn admin_force_delete_rack( if let Err(error) = api .credential_manager - .delete_credentials(&rack_maintenance_access_token_key(&rack_id)) + .delete_credentials(&rack_maintenance_access_token_key( + &rack_id, + maintenance_request_id, + )) .await { tracing::warn!( @@ -656,6 +661,9 @@ pub(crate) async fn on_demand_rack_maintenance( } let scope = MaintenanceScope { + maintenance_request_id: maintenance_access_token + .as_ref() + .map(|_| uuid::Uuid::new_v4().to_string()), machine_ids: proto_scope .machine_ids .iter() @@ -774,27 +782,76 @@ pub(crate) async fn on_demand_rack_maintenance( } } - let access_token_stored = maintenance_access_token.is_some(); - if let Some(token) = maintenance_access_token { - api.credential_manager - .set_credentials( - &rack_maintenance_access_token_key(&rack_id), - &Credentials::UsernamePassword { - username: "access_token".into(), - password: token, - }, - ) - .await - .map_err(|error| CarbideError::Internal { - message: format!("failed to store rack maintenance access token: {error}"), - })?; + let maintenance_access_token_key = maintenance_access_token.as_ref().map(|_| { + rack_maintenance_access_token_key(&rack_id, scope.maintenance_request_id.as_deref()) + }); + if let (Some(token), Some(key)) = ( + maintenance_access_token.as_ref(), + maintenance_access_token_key.as_ref(), + ) && let Err(error) = api + .credential_manager + .set_credentials( + key, + &Credentials::UsernamePassword { + username: "access_token".into(), + password: token.clone(), + }, + ) + .await + { + if let Err(cleanup_error) = api.credential_manager.delete_credentials(key).await { + tracing::warn!( + rack_id = %rack_id, + maintenance_request_id = ?scope.maintenance_request_id, + error = %cleanup_error, + "failed to clean up request-scoped rack maintenance access token after store error", + ); + } + return Err(CarbideError::Internal { + message: format!("failed to store rack maintenance access token: {error}"), + } + .into()); } - let mut updated_config = rack.config.clone(); - updated_config.maintenance_requested = Some(scope); - - let db_result: Result<(), Status> = async { + let transaction_result: Result<_, Status> = async { let mut txn = api.txn_begin().await?; + // Re-read and lock the live rack before scheduling. The validation + // above can race another maintenance request while device scope and + // credentials are checked; deriving the write from this locked row + // prevents overwriting a newly queued request or an in-flight power + // dispatch marker. + let live_rack = sqlx::query_as::<_, Rack>("SELECT * FROM racks WHERE id = $1 FOR UPDATE") + .bind(&rack_id) + .fetch_optional(&mut txn) + .await + .map_err(|error| { + Status::internal(format!("failed to lock rack {rack_id}: {error}")) + })? + .ok_or_else(|| CarbideError::NotFoundError { + kind: "rack", + id: rack_id.to_string(), + })?; + if !matches!( + *live_rack.controller_state, + RackState::Ready | RackState::Error { .. } + ) { + return Err(CarbideError::InvalidArgument(format!( + "Rack {} is not in Ready or Error state (current: {:?}). Maintenance can only be requested when the rack is Ready or in Error.", + rack_id, *live_rack.controller_state + )) + .into()); + } + if live_rack.config.maintenance_requested.is_some() { + return Err(CarbideError::InvalidArgument(format!( + "On-demand maintenance for rack {} is already scheduled.", + rack_id, + )) + .into()); + } + + let mut updated_config = live_rack.config; + updated_config.power_control_dispatch_started_at = None; + updated_config.maintenance_requested = Some(scope); db_rack::update(&mut txn, &rack_id, &updated_config).await?; if updated_config .maintenance_requested @@ -809,25 +866,34 @@ pub(crate) async fn on_demand_rack_maintenance( { db_rack::update_firmware_upgrade_job(txn.as_mut(), &rack_id, None).await?; } - txn.commit().await?; - Ok(()) + + Ok::<_, Status>(txn) } .await; - if let Err(status) = db_result { - if access_token_stored - && let Err(error) = api - .credential_manager - .delete_credentials(&rack_maintenance_access_token_key(&rack_id)) - .await - { - tracing::warn!( - rack_id = %rack_id, - error = %error, - "failed to delete rack maintenance access token after DB error", - ); + + let txn = match transaction_result { + Ok(txn) => txn, + Err(error) => { + // No commit was attempted, so this request cannot own the rack + // maintenance claim. Its unique credential is safe to remove. + if let Some(key) = maintenance_access_token_key.as_ref() + && let Err(cleanup_error) = api.credential_manager.delete_credentials(key).await + { + tracing::warn!( + rack_id = %rack_id, + error = %cleanup_error, + "failed to clean up rejected request-scoped rack maintenance access token", + ); + } + return Err(error); } - return Err(status); - } + }; + + // Commit errors are outcome-ambiguous. Retain this request's unique token: + // the database may have committed a scope that references it. If the commit + // did fail, the unreferenced request-scoped token cannot affect a later + // maintenance request. + txn.commit().await?; tracing::info!("On-demand maintenance scheduled for rack {}", rack_id,); diff --git a/crates/api-core/src/setup.rs b/crates/api-core/src/setup.rs index e2da222fda..0f49b812a3 100644 --- a/crates/api-core/src/setup.rs +++ b/crates/api-core/src/setup.rs @@ -1151,6 +1151,13 @@ async fn initialize_and_start_controllers<'a>( db_pool: db_pool.clone(), db_reader: db_pool.clone().into(), redfish_client_pool: shared_redfish_pool.clone(), + core_compute_tray_manager: Arc::new( + component_manager::core_compute_manager::CoreComputeTrayManager::new( + shared_redfish_pool.clone(), + ), + ), + component_manager: component_manager.clone().map(Arc::new), + credential_reader: credential_manager.clone(), ipmi_tool: ipmi_tool.clone(), site_config: carbide_config.machine_state_handler_site_config().into(), per_object_metrics_registry: per_object_metrics_registry.clone(), @@ -1334,6 +1341,7 @@ async fn initialize_and_start_controllers<'a>( .services( RackStateHandlerServices { db_pool: db_pool.clone(), + component_manager: component_manager.clone().map(Arc::new), rms_client: rms_client.clone(), site_config: RackConfig { rms: carbide_config.rms.clone(), diff --git a/crates/api-core/src/tests/common/api_fixtures/mod.rs b/crates/api-core/src/tests/common/api_fixtures/mod.rs index d6135aa0c2..2c531fc6f3 100644 --- a/crates/api-core/src/tests/common/api_fixtures/mod.rs +++ b/crates/api-core/src/tests/common/api_fixtures/mod.rs @@ -182,6 +182,7 @@ pub struct TestEnvOverrides { pub compute_allocation_enforcement: Option, pub nmxc_simulator: Option, pub redfish_overrides: Option, + pub component_manager_config: Option, pub nras_should_fail_parsing: Option>, pub vpc_prefixes_drain_period: Option, pub dhcp_lease_expiry_handling: Option, @@ -303,6 +304,7 @@ pub struct TestEnv { pub nvl_partition_monitor: Arc>, pub switch_cert_monitor: Arc>, pub test_credential_manager: Arc, + pub test_credential_reader: Arc, pub rms_sim: Arc, pub test_component_manager: Option>, pub drop_guard: DropGuard, @@ -333,6 +335,13 @@ impl TestEnv { db_pool: self.pool.clone(), db_reader: self.pool.clone().into(), redfish_client_pool: self.redfish_sim.clone(), + core_compute_tray_manager: Arc::new( + component_manager::core_compute_manager::CoreComputeTrayManager::new( + self.redfish_sim.clone(), + ), + ), + component_manager: self.test_component_manager.clone(), + credential_reader: self.test_credential_reader.clone(), ipmi_tool: self.ipmi_tool.clone(), site_config: self.config.machine_state_handler_site_config().into(), per_object_metrics_registry: self.per_object_metrics_registry(), @@ -358,6 +367,7 @@ impl TestEnv { pub fn rack_state_handler_services(&self) -> RackStateHandlerServices { RackStateHandlerServices { db_pool: self.pool.clone(), + component_manager: self.test_component_manager.clone(), rms_client: self.rms_sim.as_rms_client(), site_config: RackConfig { rms: self.config.rms.clone(), @@ -1244,12 +1254,13 @@ pub async fn create_test_env_with_overrides( let test_meter = TestMeter::default(); let credential_manager = Arc::new(TestCredentialManager::default()); - let chained_reader = ChainedCredentialReader::from(vec![ - Box::new(test_static_credential_snapshot()) as Box, - Box::new(credential_manager.clone()), - ]); + let credential_reader: Arc = + Arc::new(ChainedCredentialReader::from(vec![ + Box::new(test_static_credential_snapshot()) as Box, + Box::new(credential_manager.clone()), + ])); let composite_manager: Arc = Arc::new(CompositeCredentialManager::new( - chained_reader, + credential_reader.clone(), credential_manager.clone(), )); @@ -1354,19 +1365,22 @@ pub async fn create_test_env_with_overrides( .rack_profiles .extend(config.rack_profiles.rack_profiles.clone()); - let test_component_manager = component_manager::component_manager::build_component_manager( - &component_manager::config::ComponentManagerConfig { + let component_manager_config = overrides.component_manager_config.unwrap_or_else(|| { + component_manager::config::ComponentManagerConfig { nv_switch_backend: component_manager::nv_switch_manager::Backend::Rms, power_shelf_backend: component_manager::power_shelf_manager::Backend::Rms, compute_tray_backend: component_manager::compute_tray_manager::Backend::Mock, nv_switch_use_state_controller: true, ..Default::default() - }, + } + }); + let test_component_manager = component_manager::component_manager::build_component_manager( + &component_manager_config, component_manager_rack_profiles, rms_sim.as_rms_client(), None, Some(db_pool.clone()), - None, + Some(redfish_sim.clone()), ) .await .expect("test component manager should build"); @@ -1496,6 +1510,13 @@ pub async fn create_test_env_with_overrides( db_pool: db_pool.clone(), db_reader: db_pool.clone().into(), redfish_client_pool: redfish_sim.clone(), + core_compute_tray_manager: Arc::new( + component_manager::core_compute_manager::CoreComputeTrayManager::new( + redfish_sim.clone(), + ), + ), + component_manager: test_component_manager.clone(), + credential_reader: credential_reader.clone(), ipmi_tool: ipmi_tool.clone(), site_config: config.machine_state_handler_site_config().into(), per_object_metrics_registry: per_object_metrics_registry.clone(), @@ -1635,6 +1656,7 @@ pub async fn create_test_env_with_overrides( .services( RackStateHandlerServices { db_pool: db_pool.clone(), + component_manager: test_component_manager.clone(), rms_client: rms_sim.as_rms_client(), site_config: RackConfig { rms: config.rms.clone(), @@ -1790,6 +1812,7 @@ pub async fn create_test_env_with_overrides( nvl_partition_monitor: Arc::new(Mutex::new(nvl_partition_monitor)), switch_cert_monitor: Arc::new(Mutex::new(switch_cert_monitor)), test_credential_manager: credential_manager.clone(), + test_credential_reader: credential_reader, rms_sim, test_component_manager, drop_guard: cancel_token.drop_guard(), diff --git a/crates/api-core/src/tests/component_manager_compute_power.rs b/crates/api-core/src/tests/component_manager_compute_power.rs new file mode 100644 index 0000000000..8385ce1631 --- /dev/null +++ b/crates/api-core/src/tests/component_manager_compute_power.rs @@ -0,0 +1,422 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use carbide_redfish::libredfish::test_support::RedfishSimAction; +use carbide_secrets::credentials::{CredentialKey, CredentialType, Credentials}; +use carbide_uuid::machine::MachineId; +use carbide_uuid::rack::{RackId, RackProfileId}; +use component_manager::compute_tray_manager::Backend as ComputeBackend; +use component_manager::config::ComponentManagerConfig; +use component_manager::nv_switch_manager::Backend as NvSwitchBackend; +use component_manager::power_shelf_manager::Backend as PowerShelfBackend; +use db::rack as db_rack; +use librms::protos::rack_manager as rms; +use model::component_manager::PowerAction; +use model::rack::{MaintenanceActivity, RackConfig, RackState}; +use model::test_support::ManagedHostConfig; +use rpc::common::{MachineIdList, SystemPowerControl}; +use rpc::forge::ComponentPowerControlRequest; +use rpc::forge::component_power_control_request::Target; +use rpc::forge::forge_server::Forge; +use tonic::Request; + +use crate::test_support::fixture_config::FixtureDefault as _; +use crate::tests::common::api_fixtures::site_explorer::new_host; +use crate::tests::common::api_fixtures::{ + TEST_RMS_RACK_PROFILE_ID, TestEnv, TestEnvOverrides, create_test_env_with_overrides, +}; + +fn rms_compute_overrides() -> TestEnvOverrides { + TestEnvOverrides { + component_manager_config: Some(ComponentManagerConfig { + nv_switch_backend: NvSwitchBackend::Mock, + power_shelf_backend: PowerShelfBackend::Mock, + compute_tray_backend: ComputeBackend::Rms, + ..Default::default() + }), + ..Default::default() + } +} + +fn core_compute_overrides() -> TestEnvOverrides { + TestEnvOverrides { + component_manager_config: Some(ComponentManagerConfig { + nv_switch_backend: NvSwitchBackend::Mock, + power_shelf_backend: PowerShelfBackend::Mock, + compute_tray_backend: ComputeBackend::Core, + ..Default::default() + }), + ..Default::default() + } +} + +async fn create_rms_compute_env( + pool: sqlx::PgPool, +) -> Result<(TestEnv, RackId, model::machine::ManagedHostStateSnapshot), Box> +{ + let env = create_test_env_with_overrides(pool.clone(), rms_compute_overrides()).await; + + let rack_id = RackId::new(uuid::Uuid::new_v4().to_string()); + let mut txn = pool.begin().await?; + let rack = db_rack::create( + txn.as_mut(), + &rack_id, + Some(&RackProfileId::new(TEST_RMS_RACK_PROFILE_ID)), + &RackConfig::default(), + None, + ) + .await?; + db_rack::try_update_controller_state( + txn.as_mut(), + &rack_id, + rack.controller_state.version, + rack.controller_state.version.increment(), + &RackState::Ready, + ) + .await?; + txn.commit().await?; + + // Provision the host as a standalone machine first. Associating it with the RMS rack before + // provisioning would make the background machine lifecycle consume the RMS mock while it is + // still setting up the host, rather than leaving the mock isolated for this API request. + let host = new_host(&env, ManagedHostConfig::default()).await?; + + let mut txn = pool.begin().await?; + sqlx::query("UPDATE machines SET rack_id = $1 WHERE id = $2") + .bind(rack_id.as_str()) + .bind(host.host_snapshot.id) + .execute(txn.as_mut()) + .await?; + txn.commit().await?; + + Ok((env, rack_id, host)) +} + +#[crate::sqlx_test] +async fn standalone_power_uses_core_under_global_rms_for_each_bypass_setting( + pool: sqlx::PgPool, +) -> Result<(), Box> { + struct Case { + name: &'static str, + bypass_state_controller: bool, + } + + let env = create_test_env_with_overrides(pool, rms_compute_overrides()).await; + let host = new_host(&env, ManagedHostConfig::default()).await?; + + for case in [ + Case { + name: "normal dispatch", + bypass_state_controller: false, + }, + Case { + name: "explicit state-controller bypass", + bypass_state_controller: true, + }, + ] { + let timepoint = env.redfish_sim.timepoint(); + let response = env + .api + .component_power_control(Request::new(power_request( + host.host_snapshot.id, + case.bypass_state_controller, + ))) + .await? + .into_inner(); + + assert_eq!(response.results.len(), 1, "{}", case.name); + assert_eq!( + response.results[0].component_id, + host.host_snapshot.id.to_string(), + "{}", + case.name + ); + assert_eq!( + response.results[0].status, + rpc::forge::ComponentManagerStatusCode::Success as i32, + "{}", + case.name + ); + assert_eq!( + env.redfish_sim.actions_since(&timepoint).all_hosts(), + vec![RedfishSimAction::Power( + libredfish::SystemPowerControl::ForceRestart + )], + "{}", + case.name + ); + assert!( + env.rms_sim + .submitted_batch_set_power_state_requests() + .await + .is_empty(), + "{} must not dispatch to RMS", + case.name + ); + } + + let mut txn = env.pool.begin().await?; + let racks = db_rack::find_by( + txn.as_mut(), + db::ObjectColumnFilter::::All, + ) + .await?; + assert!( + racks + .iter() + .all(|rack| rack.config.maintenance_requested.is_none()), + "standalone power must not queue rack maintenance" + ); + + Ok(()) +} + +fn power_request( + machine_id: MachineId, + bypass_state_controller: bool, +) -> ComponentPowerControlRequest { + power_request_for_machines( + vec![machine_id], + SystemPowerControl::ForceRestart, + bypass_state_controller, + ) +} + +fn power_request_for_machines( + machine_ids: Vec, + action: SystemPowerControl, + bypass_state_controller: bool, +) -> ComponentPowerControlRequest { + ComponentPowerControlRequest { + target: Some(Target::MachineIds(MachineIdList { machine_ids })), + action: action as i32, + bypass_state_controller, + } +} + +#[crate::sqlx_test] +async fn core_component_fixture_uses_redfish_and_composite_credentials( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env_with_overrides(pool, core_compute_overrides()).await; + + assert_eq!( + env.test_component_manager + .as_ref() + .expect("test component manager") + .compute_tray + .backend(), + ComputeBackend::Core, + ); + let credentials = env + .machine_state_handler_services() + .credential_reader + .get_credentials(&CredentialKey::HostRedfish { + credential_type: CredentialType::SiteDefault, + }) + .await + .expect("read static test credentials"); + assert_eq!( + credentials, + Some(Credentials::UsernamePassword { + username: "root".into(), + password: "hostredfish_sitedefault".into(), + }), + ); + + Ok(()) +} + +#[crate::sqlx_test] +async fn direct_power_rejects_every_machine_sharing_a_bmc_ip( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env_with_overrides(pool, core_compute_overrides()).await; + let first_host = new_host(&env, ManagedHostConfig::default()).await?; + let second_host = new_host(&env, ManagedHostConfig::default()).await?; + let machine_ids = vec![first_host.host_snapshot.id, second_host.host_snapshot.id]; + let duplicate_bmc_ip = first_host + .host_snapshot + .bmc_info + .ip + .expect("first host BMC IP"); + let second_bmc_interface_id = second_host + .host_snapshot + .bmc_info + .machine_interface_id + .expect("second host BMC interface"); + + let update = + sqlx::query("UPDATE machine_interface_addresses SET address = $1 WHERE interface_id = $2") + .bind(duplicate_bmc_ip) + .bind(second_bmc_interface_id) + .execute(&env.pool) + .await?; + assert_eq!(update.rows_affected(), 1); + + let mut txn = env.pool.begin().await?; + let before = db::power_options::get_by_ids(&machine_ids, txn.as_mut()).await?; + txn.commit().await?; + let redfish_timepoint = env.redfish_sim.timepoint(); + + let response = env + .api + .component_power_control(Request::new(power_request_for_machines( + machine_ids.clone(), + SystemPowerControl::ForceOff, + false, + ))) + .await? + .into_inner(); + + assert_eq!(response.results.len(), machine_ids.len()); + for machine_id in &machine_ids { + let result = response + .results + .iter() + .find(|result| result.component_id == machine_id.to_string()) + .expect("duplicate-IP machine result"); + assert_eq!( + result.status, + rpc::forge::ComponentManagerStatusCode::InternalError as i32, + ); + assert!(result.error.contains(&duplicate_bmc_ip.to_string())); + assert!( + machine_ids + .iter() + .all(|machine_id| result.error.contains(&machine_id.to_string())), + "duplicate-IP result did not identify every affected machine: {}", + result.error, + ); + } + assert!( + env.redfish_sim + .actions_since(&redfish_timepoint) + .all_hosts() + .is_empty(), + "duplicate BMC IPs must not dispatch Core Redfish power", + ); + assert!( + env.rms_sim + .submitted_batch_set_power_state_requests() + .await + .is_empty(), + "duplicate BMC IPs must not dispatch RMS power", + ); + + let mut txn = env.pool.begin().await?; + let after = db::power_options::get_by_ids(&machine_ids, txn.as_mut()).await?; + txn.commit().await?; + for machine_id in &machine_ids { + let before = before + .iter() + .find(|options| options.host_id == *machine_id) + .expect("power options before request"); + let after = after + .iter() + .find(|options| options.host_id == *machine_id) + .expect("power options after request"); + assert_eq!(after.desired_power_state, before.desired_power_state); + assert_eq!( + after.desired_power_state_version, + before.desired_power_state_version, + ); + } + + Ok(()) +} + +#[crate::sqlx_test] +async fn rack_rms_power_queues_without_synchronous_backend_dispatch( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let (env, rack_id, host) = create_rms_compute_env(pool).await?; + + let response = env + .api + .component_power_control(Request::new(power_request(host.host_snapshot.id, false))) + .await? + .into_inner(); + + assert_eq!(response.results.len(), 1); + assert_eq!( + response.results[0].status, + rpc::forge::ComponentManagerStatusCode::Success as i32 + ); + assert!( + env.rms_sim + .submitted_batch_set_power_state_requests() + .await + .is_empty(), + "queued rack power must not dispatch RMS synchronously" + ); + + let mut txn = env.pool.begin().await?; + let rack = db_rack::find_by( + txn.as_mut(), + db::ObjectColumnFilter::One(db_rack::IdColumn, &rack_id), + ) + .await? + .pop() + .expect("queued rack"); + let scope = rack + .config + .maintenance_requested + .expect("rack maintenance request"); + assert_eq!(scope.machine_ids, vec![host.host_snapshot.id]); + assert_eq!( + scope.activities, + vec![MaintenanceActivity::PowerControl { + action: PowerAction::ForceRestart, + }] + ); + + Ok(()) +} + +#[crate::sqlx_test] +async fn rack_rms_power_bypass_dispatches_exact_action_directly( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let (env, rack_id, host) = create_rms_compute_env(pool).await?; + env.rms_sim + .queue_batch_set_power_state_response(Ok(rms::BatchSetPowerStateResponse { + response: Some(rms::NodeBatchResponse { + status: rms::ReturnCode::Success as i32, + stats: Some(rms::NodeOperationStats { + total_nodes: 1, + successful_nodes: 1, + failed_nodes: 0, + }), + ..Default::default() + }), + })) + .await; + + let response = env + .api + .component_power_control(Request::new(power_request(host.host_snapshot.id, true))) + .await? + .into_inner(); + + assert_eq!(response.results.len(), 1); + assert_eq!( + response.results[0].status, + rpc::forge::ComponentManagerStatusCode::Success as i32 + ); + let calls = env.rms_sim.submitted_batch_set_power_state_requests().await; + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].operation, rms::PowerOperation::ForceRestart as i32); + + let mut txn = env.pool.begin().await?; + let rack = db_rack::find_by( + txn.as_mut(), + db::ObjectColumnFilter::One(db_rack::IdColumn, &rack_id), + ) + .await? + .pop() + .expect("bypassed rack"); + assert!(rack.config.maintenance_requested.is_none()); + + Ok(()) +} diff --git a/crates/api-core/src/tests/machine_power.rs b/crates/api-core/src/tests/machine_power.rs index 3dd3e6cdc4..22900537a9 100644 --- a/crates/api-core/src/tests/machine_power.rs +++ b/crates/api-core/src/tests/machine_power.rs @@ -71,6 +71,46 @@ async fn test_power_manager_create_entry_on_host_creation( Ok(()) } +#[crate::sqlx_test] +async fn test_power_manager_desired_state_rejects_stale_version( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = + create_test_env_with_overrides(pool, TestEnvOverrides::default().enable_power_manager()) + .await; + let (host_machine_id, _dpu_machine_id) = create_managed_host(&env).await.into(); + + let mut txn = env.pool.begin().await?; + let original = db::power_options::get_by_ids(&[host_machine_id], &mut txn) + .await? + .pop() + .expect("host power options"); + db::power_options::update_desired_state( + &host_machine_id, + PowerState::Off, + &original.desired_power_state_version, + &mut txn, + ) + .await?; + txn.commit().await?; + + let mut txn = env.pool.begin().await?; + let error = db::power_options::update_desired_state( + &host_machine_id, + PowerState::On, + &original.desired_power_state_version, + &mut txn, + ) + .await + .unwrap_err(); + assert!( + matches!(error, db::DatabaseError::ConcurrentModificationError(..)), + "expected stale desired-state update to fail, got {error:?}" + ); + + Ok(()) +} + #[crate::sqlx_test] async fn test_power_manager_update_fail_since_no_maintenance_set( pool: sqlx::PgPool, diff --git a/crates/api-core/src/tests/machine_states.rs b/crates/api-core/src/tests/machine_states.rs index 7ef5a64ee6..ff301cead4 100644 --- a/crates/api-core/src/tests/machine_states.rs +++ b/crates/api-core/src/tests/machine_states.rs @@ -16,8 +16,8 @@ */ use std::collections::HashMap; use std::net::IpAddr; -use std::sync::Arc; use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex}; use ::rpc::measured_boot::FromGrpc; use base64::prelude::*; @@ -25,6 +25,8 @@ use carbide_machine_controller::context::MachineStateHandlerContextObjects; use carbide_machine_controller::handler::{MachineStateHandlerBuilder, handler_host_power_control}; use carbide_machine_controller::metrics::MachineMetrics; use carbide_redfish::libredfish::test_support::RedfishSimAction; +use carbide_redfish::libredfish::{RedfishAuth, RedfishClientPool}; +use carbide_secrets::credentials::CredentialReader; use carbide_site_explorer::MachineCreator; use carbide_site_explorer::config::SiteExplorerConfig; use carbide_utils::arch::CpuArchitecture; @@ -44,6 +46,14 @@ use common::api_fixtures::{ TestEnv, TestManagedHost, create_managed_host, create_managed_host_with_config, create_test_env, create_test_env_with_overrides, get_config, }; +use component_manager::component_manager::ComponentManager; +use component_manager::compute_tray_manager::{ + Backend as ComputeBackend, ComputeTrayEndpoint, ComputeTrayFirmwareUpdateStatus, + ComputeTrayManager, ComputeTrayResult, ComputeTrayVendor, +}; +use component_manager::error::ComponentManagerError; +use component_manager::mock::{MockNvSwitchManager, MockPowerShelfManager}; +use component_manager::types::FirmwareUpdateOptions; use health_report::HealthReport; use ipnetwork::IpNetwork; use mac_address::MacAddress; @@ -51,6 +61,7 @@ use measured_boot::bundle::MeasurementBundle; use measured_boot::pcr::PcrRegisterValue; use measured_boot::records::MeasurementBundleState; use measured_boot::report::MeasurementReport; +use model::component_manager::{ComputeTrayComponent, PowerAction}; use model::controller_outcome::PersistentStateHandlerOutcome; use model::expected_machine::{ExpectedMachine, ExpectedMachineData}; use model::firmware::FirmwareComponentType; @@ -60,10 +71,11 @@ use model::machine::machine_search_config::MachineSearchConfig; use model::machine::{ BiosConfigInfo, BiosConfigState, CleanupContext, CleanupState, DpuDiscoveringState, DpuInitState, DpuReprovisionStates, FailureCause, FailureDetails, FailureSource, - HostPlatformConfigurationState, HostReprovisionState, InstallDpuOsState, InstanceState, - LockdownMode, MachineState, MachineValidatingState, ManagedHostState, MeasuringState, - PowerState, RetryInfo, SetBootOrderInfo, SetBootOrderState, SetSecureBootState, - SpdmMeasuringState, StateMachineArea, ValidationState, + HostPlatformConfigurationState, HostReprovisionState, InitialResetPhase, InstallDpuOsState, + InstanceState, LockdownMode, MachineLastRebootRequestedMode, MachineState, + MachineValidatingState, ManagedHostState, MeasuringState, PowerState, RetryInfo, + SetBootOrderInfo, SetBootOrderState, SetSecureBootState, SpdmMeasuringState, StateMachineArea, + ValidationState, }; use model::network_segment::NetworkSegmentType; use model::site_explorer::{EndpointExplorationReport, ExploredDpu, ExploredManagedHost}; @@ -74,10 +86,113 @@ use rpc::forge_agent_control_response::{Action, LegacyAction}; use rpc::machine_discovery::AttestKeyInfo; use rpc::{DiscoveryData, DiscoveryInfo}; use state_controller::db_write_batch::DbWriteBatch; -use state_controller::state_handler::StateHandlerContext; +use state_controller::state_handler::{StateHandler, StateHandlerContext, StateHandlerOutcome}; use tonic::{Code, Request}; use crate::cfg::file::DpuConfig as InitialDpuConfig; + +#[derive(Debug, Clone, PartialEq, Eq)] +struct RecordedComputePowerCall { + bmc_ip: IpAddr, + bmc_port: Option, + vendor: ComputeTrayVendor, + action: PowerAction, +} + +#[derive(Debug, Default)] +struct RecordingRmsComputeManager { + calls: Mutex>, +} + +impl RecordingRmsComputeManager { + fn calls(&self) -> Vec { + self.calls.lock().unwrap().clone() + } +} + +#[async_trait::async_trait] +impl ComputeTrayManager for RecordingRmsComputeManager { + fn name(&self) -> &str { + "recording-rms" + } + + fn backend(&self) -> ComputeBackend { + ComputeBackend::Rms + } + + async fn power_control( + &self, + endpoints: &[ComputeTrayEndpoint], + action: PowerAction, + ) -> Result, ComponentManagerError> { + self.calls + .lock() + .unwrap() + .extend(endpoints.iter().map(|endpoint| RecordedComputePowerCall { + bmc_ip: endpoint.bmc_ip, + bmc_port: endpoint.bmc_port, + vendor: endpoint.vendor, + action, + })); + Ok(endpoints + .iter() + .map(|endpoint| ComputeTrayResult { + bmc_ip: endpoint.bmc_ip, + success: true, + error: None, + }) + .collect()) + } + + async fn update_firmware( + &self, + _endpoints: &[ComputeTrayEndpoint], + _target_version: &str, + _components: &[ComputeTrayComponent], + _options: &FirmwareUpdateOptions, + ) -> Result, ComponentManagerError> { + Err(ComponentManagerError::Internal( + "firmware update is outside this test".into(), + )) + } + + async fn get_firmware_status( + &self, + _endpoints: &[ComputeTrayEndpoint], + ) -> Result, ComponentManagerError> { + Err(ComponentManagerError::Internal( + "firmware status is outside this test".into(), + )) + } + + async fn list_firmware_bundles(&self) -> Result, ComponentManagerError> { + Err(ComponentManagerError::Internal( + "firmware bundles are outside this test".into(), + )) + } +} + +struct PanicRedfishClientPool { + credential_reader: Arc, +} + +#[async_trait::async_trait] +impl RedfishClientPool for PanicRedfishClientPool { + async fn create_client( + &self, + _host: &str, + _port: Option, + _auth: RedfishAuth, + _vendor: Option, + ) -> Result, carbide_redfish::libredfish::RedfishClientCreationError> + { + panic!("RMS power routing must not create a direct Redfish client") + } + + fn credential_reader(&self) -> &dyn CredentialReader { + self.credential_reader.as_ref() + } +} use crate::handlers::measured_boot::rpc_forge::MachineDiscoveryInfo; use crate::measured_boot::convert_vec; use crate::test_support::fixture_config::{ @@ -2653,6 +2768,349 @@ async fn test_update_reboot_requested_time_off(pool: sqlx::PgPool) { } } +#[crate::sqlx_test] +async fn test_ac_powercycle_fallback_records_force_off_for_host_and_dpus(pool: sqlx::PgPool) { + let env = create_test_env(pool).await; + let mh = create_managed_host(&env).await; + + let mut txn = env.db_txn().await; + let snapshot = mh.snapshot(&mut txn).await; + assert!(!snapshot.dpu_snapshots.is_empty()); + let redfish_timepoint = env.redfish_sim.timepoint(); + let mut write_batch = DbWriteBatch::new(); + let mut services = env.machine_state_handler_services(); + let mut metrics = MachineMetrics::default(); + let mut ctx = StateHandlerContext:: { + services: &mut services, + metrics: &mut metrics, + pending_db_writes: &mut write_batch, + }; + + handler_host_power_control( + &snapshot, + &mut ctx, + libredfish::SystemPowerControl::ACPowercycle, + ) + .await + .unwrap(); + write_batch.apply_all(&mut txn).await.unwrap(); + txn.commit().await.unwrap(); + + assert_eq!( + env.redfish_sim + .actions_since(&redfish_timepoint) + .all_hosts(), + vec![RedfishSimAction::Power( + libredfish::SystemPowerControl::ForceOff + )], + "the test Redfish backend does not support AC power cycle and must fall back to ForceOff" + ); + + let mut txn = env.db_txn().await; + let updated = mh.snapshot(&mut txn).await; + assert_eq!( + updated.host_snapshot.last_reboot_requested.unwrap().mode, + MachineLastRebootRequestedMode::PowerOff + ); + assert!(updated.dpu_snapshots.iter().all(|dpu| { + dpu.last_reboot_requested + .is_some_and(|reboot| reboot.mode == MachineLastRebootRequestedMode::PowerOff) + })); +} + +#[crate::sqlx_test] +async fn test_rack_rms_host_power_bypasses_core_redfish_preflight(pool: sqlx::PgPool) { + let env = create_test_env(pool).await; + let mh = create_managed_host(&env).await; + let rack_id = carbide_uuid::rack::RackId::new(uuid::Uuid::new_v4().to_string()); + + let mut txn = env.db_txn().await; + db::rack::create( + &mut txn, + &rack_id, + None, + &model::rack::RackConfig::default(), + None, + ) + .await + .unwrap(); + sqlx::query("UPDATE machines SET rack_id = $1 WHERE id = $2") + .bind(&rack_id) + .bind(mh.host().id) + .execute(&mut *txn) + .await + .unwrap(); + txn.commit().await.unwrap(); + + let mut txn = env.db_txn().await; + let snapshot = mh.snapshot(&mut txn).await; + txn.commit().await.unwrap(); + let host = &snapshot.host_snapshot; + let expected_bmc_ip = host.bmc_info.ip.expect("test host BMC IP"); + let expected_bmc_port = host.bmc_info.port; + + let compute_manager = Arc::new(RecordingRmsComputeManager::default()); + let component_manager = ComponentManager::new( + Arc::new(MockNvSwitchManager), + Arc::new(MockPowerShelfManager), + compute_manager.clone(), + false, + false, + false, + ); + let mut services = env.machine_state_handler_services(); + services.component_manager = Some(Arc::new(component_manager)); + services.redfish_client_pool = Arc::new(PanicRedfishClientPool { + credential_reader: env.test_credential_manager.clone(), + }); + + let mut write_batch = DbWriteBatch::new(); + let mut metrics = MachineMetrics::default(); + let mut ctx = StateHandlerContext:: { + services: &mut services, + metrics: &mut metrics, + pending_db_writes: &mut write_batch, + }; + + handler_host_power_control( + &snapshot, + &mut ctx, + libredfish::SystemPowerControl::GracefulRestart, + ) + .await + .unwrap(); + + assert_eq!( + compute_manager.calls(), + vec![RecordedComputePowerCall { + bmc_ip: expected_bmc_ip, + bmc_port: expected_bmc_port, + vendor: ComputeTrayVendor::Dell, + action: PowerAction::GracefulRestart, + }] + ); +} + +#[crate::sqlx_test] +async fn test_rack_rms_initial_reset_uses_core_redfish_end_to_end(pool: sqlx::PgPool) { + async fn run_tick( + handler: &carbide_machine_controller::handler::MachineStateHandler, + snapshot: &mut model::machine::ManagedHostStateSnapshot, + services: &mut carbide_machine_controller::context::MachineStateHandlerServices, + ) -> StateHandlerOutcome { + let machine_id = snapshot.host_snapshot.id; + let current_state = snapshot.managed_state.clone(); + let mut write_batch = DbWriteBatch::new(); + let mut metrics = MachineMetrics::default(); + let mut ctx = StateHandlerContext:: { + services, + metrics: &mut metrics, + pending_db_writes: &mut write_batch, + }; + + handler + .handle_object_state(&machine_id, snapshot, ¤t_state, &mut ctx) + .await + .unwrap() + } + + fn set_state(snapshot: &mut model::machine::ManagedHostStateSnapshot, state: ManagedHostState) { + snapshot.host_snapshot.state.value = state.clone(); + snapshot.managed_state = state; + } + + fn transitioned_to( + outcome: StateHandlerOutcome, + expected_phase: InitialResetPhase, + ) -> ManagedHostState { + let StateHandlerOutcome::Transition { next_state, .. } = outcome else { + panic!("expected an initial-reset phase transition"); + }; + assert!(matches!( + &next_state, + ManagedHostState::HostReprovision { + reprovision_state: HostReprovisionState::InitialReset { phase, .. }, + retry_count: 0, + } if phase == &expected_phase + )); + next_state + } + + fn assert_waiting(outcome: StateHandlerOutcome, expected_reason: &str) { + let StateHandlerOutcome::Wait { reason, .. } = outcome else { + panic!("expected the initial-reset phase to keep polling"); + }; + assert!( + reason.contains(expected_reason), + "unexpected wait reason: {reason}" + ); + } + + let env = create_test_env(pool).await; + let mh = create_managed_host(&env).await; + let rack_id = carbide_uuid::rack::RackId::new(uuid::Uuid::new_v4().to_string()); + + let mut txn = env.db_txn().await; + db::rack::create( + &mut txn, + &rack_id, + None, + &model::rack::RackConfig::default(), + None, + ) + .await + .unwrap(); + sqlx::query("UPDATE machines SET rack_id = $1 WHERE id = $2") + .bind(&rack_id) + .bind(mh.host().id) + .execute(&mut *txn) + .await + .unwrap(); + txn.commit().await.unwrap(); + + let mut txn = env.db_txn().await; + let mut snapshot = mh.snapshot(&mut txn).await; + let bmc_access = mh.host().bmc_access(&mut txn).await; + txn.commit().await.unwrap(); + let redfish_client = env.redfish_sim.client_by_info(&bmc_access).await.unwrap(); + redfish_client + .power(libredfish::SystemPowerControl::On) + .await + .unwrap(); + let initial_redfish_timepoint = env.redfish_sim.timepoint(); + + let compute_manager = Arc::new(RecordingRmsComputeManager::default()); + let component_manager = ComponentManager::new( + Arc::new(MockNvSwitchManager), + Arc::new(MockPowerShelfManager), + compute_manager.clone(), + false, + false, + false, + ); + let mut services = env.machine_state_handler_services(); + services.component_manager = Some(Arc::new(component_manager)); + let handler = MachineStateHandlerBuilder::builder().build(); + + set_state( + &mut snapshot, + ManagedHostState::HostReprovision { + reprovision_state: HostReprovisionState::InitialReset { + phase: InitialResetPhase::Start, + last_time: chrono::Utc::now(), + }, + retry_count: 0, + }, + ); + let next_state = transitioned_to( + run_tick(&handler, &mut snapshot, &mut services).await, + InitialResetPhase::WaitingForHostOff, + ); + set_state(&mut snapshot, next_state); + assert_eq!( + env.redfish_sim + .actions_since(&initial_redfish_timepoint) + .all_hosts(), + vec![RedfishSimAction::Power( + libredfish::SystemPowerControl::ForceOff + )], + "the Start phase must power off through Core Redfish" + ); + assert!( + compute_manager.calls().is_empty(), + "initial reset must not mix RMS dispatch with direct Redfish polling" + ); + + redfish_client + .power(libredfish::SystemPowerControl::On) + .await + .unwrap(); + let waiting_for_off_timepoint = env.redfish_sim.timepoint(); + assert_waiting( + run_tick(&handler, &mut snapshot, &mut services).await, + "turn off", + ); + assert_eq!( + env.redfish_sim + .actions_since(&waiting_for_off_timepoint) + .all_hosts(), + Vec::::new(), + "polling an On host must not redispatch ForceOff or reset the BMC" + ); + assert!(compute_manager.calls().is_empty()); + + redfish_client + .power(libredfish::SystemPowerControl::ForceOff) + .await + .unwrap(); + let bmc_reset_timepoint = env.redfish_sim.timepoint(); + let next_state = transitioned_to( + run_tick(&handler, &mut snapshot, &mut services).await, + InitialResetPhase::BMCWasReset, + ); + set_state(&mut snapshot, next_state); + assert_eq!( + env.redfish_sim + .actions_since(&bmc_reset_timepoint) + .all_hosts(), + vec![RedfishSimAction::BmcReset], + "the BMC must be reset exactly once after Core observes the host Off" + ); + assert!(compute_manager.calls().is_empty()); + + let power_on_timepoint = env.redfish_sim.timepoint(); + let next_state = transitioned_to( + run_tick(&handler, &mut snapshot, &mut services).await, + InitialResetPhase::WaitingForHostOn, + ); + set_state(&mut snapshot, next_state); + assert_eq!( + env.redfish_sim + .actions_since(&power_on_timepoint) + .all_hosts(), + vec![RedfishSimAction::Power(libredfish::SystemPowerControl::On)], + "the BMC-ready phase must power on through Core Redfish" + ); + assert!(compute_manager.calls().is_empty()); + + redfish_client + .power(libredfish::SystemPowerControl::ForceOff) + .await + .unwrap(); + let waiting_for_on_timepoint = env.redfish_sim.timepoint(); + assert_waiting( + run_tick(&handler, &mut snapshot, &mut services).await, + "turn on", + ); + assert_eq!( + env.redfish_sim + .actions_since(&waiting_for_on_timepoint) + .all_hosts(), + Vec::::new(), + "polling an Off host must not redispatch On" + ); + assert!(compute_manager.calls().is_empty()); + + redfish_client + .power(libredfish::SystemPowerControl::On) + .await + .unwrap(); + let host_on_timepoint = env.redfish_sim.timepoint(); + let next_state = transitioned_to( + run_tick(&handler, &mut snapshot, &mut services).await, + InitialResetPhase::WaitHostBoot, + ); + set_state(&mut snapshot, next_state); + assert_eq!( + env.redfish_sim + .actions_since(&host_on_timepoint) + .all_hosts(), + Vec::::new(), + "observing the host On must not redispatch power" + ); + assert!(compute_manager.calls().is_empty()); +} + /// Exercises WaitingForBiosJob state by configuring mock BMC to return a job ID from machine_setup. /// Verifies that host reaches "Ready" and that state machine transitioned through WaitingForBiosJob. #[crate::sqlx_test] diff --git a/crates/api-core/src/tests/mod.rs b/crates/api-core/src/tests/mod.rs index 5338cb8f3c..f2b4d090a0 100644 --- a/crates/api-core/src/tests/mod.rs +++ b/crates/api-core/src/tests/mod.rs @@ -18,6 +18,7 @@ mod boot_interface_resolution; mod client_resolution; pub mod common; +mod component_manager_compute_power; mod compute_allocation; mod connected_device; mod credential; diff --git a/crates/api-core/src/tests/rack_find.rs b/crates/api-core/src/tests/rack_find.rs index 8d88d426dd..a2c4a5ee2d 100644 --- a/crates/api-core/src/tests/rack_find.rs +++ b/crates/api-core/src/tests/rack_find.rs @@ -15,7 +15,11 @@ * limitations under the License. */ +use carbide_secrets::credentials::{ + CredentialKey, CredentialReader, CredentialWriter, Credentials, +}; use carbide_uuid::rack::RackId; +use model::rack::{MaintenanceScope, RackConfig}; use rpc::forge::forge_server::Forge; use rpc::forge::{AdminForceDeleteRackRequest, DeleteRackRequest}; use tonic::Code; @@ -124,8 +128,37 @@ async fn test_force_delete_rack_success( .persist(&mut txn) .await .unwrap(); + let maintenance_request_id = "force-delete-request".to_string(); + db::rack::update( + &mut txn, + &rack_id, + &RackConfig { + maintenance_requested: Some(MaintenanceScope { + maintenance_request_id: Some(maintenance_request_id.clone()), + ..Default::default() + }), + ..Default::default() + }, + ) + .await + .unwrap(); drop(txn); + let access_token_key = CredentialKey::RackMaintenanceAccessToken { + rack_id: rack_id.clone(), + maintenance_request_id: Some(maintenance_request_id), + }; + env.test_credential_manager + .set_credentials( + &access_token_key, + &Credentials::UsernamePassword { + username: "access_token".to_string(), + password: "token".to_string(), + }, + ) + .await + .unwrap(); + let response = env .api .admin_force_delete_rack(tonic::Request::new(AdminForceDeleteRackRequest { @@ -146,6 +179,14 @@ async fn test_force_delete_rack_success( .racks; assert!(racks.is_empty(), "Rack should be hard-deleted"); + assert!( + env.test_credential_manager + .get_credentials(&access_token_key) + .await + .unwrap() + .is_none(), + "force-delete should remove the persisted request's token" + ); Ok(()) } diff --git a/crates/api-core/src/tests/rack_state_controller/handler.rs b/crates/api-core/src/tests/rack_state_controller/handler.rs index af2815b178..6da63d6d0c 100644 --- a/crates/api-core/src/tests/rack_state_controller/handler.rs +++ b/crates/api-core/src/tests/rack_state_controller/handler.rs @@ -15,6 +15,9 @@ * limitations under the License. */ +use std::sync::Arc; +use std::sync::atomic::{AtomicUsize, Ordering}; + use carbide_rack_controller::context::RackStateHandlerContextObjects; use carbide_rack_controller::handler::RackStateHandler; use carbide_rack_controller::maintenance::apply_nvos_job_status_response; @@ -25,18 +28,28 @@ use carbide_secrets::credentials::{ use carbide_uuid::machine::{MachineId, MachineIdSource, MachineType}; use carbide_uuid::rack::{RackId, RackProfileId}; use carbide_uuid::switch::SwitchId; +use component_manager::component_manager::ComponentManager; +use component_manager::compute_tray_manager::{ + Backend as ComputeBackend, ComputeTrayEndpoint, ComputeTrayFirmwareUpdateStatus, + ComputeTrayManager, ComputeTrayResult, +}; +use component_manager::error::ComponentManagerError; +use component_manager::types::FirmwareUpdateOptions; use db::db_read::DbReader; use db::{ ObjectColumnFilter, expected_rack as db_expected_rack, rack as db_rack, switch as db_switch, }; use librms::protos::rack_manager as rms; +use model::component_manager::{ComputeTrayComponent, PowerAction}; use model::expected_machine::ExpectedMachineData; use model::expected_rack::ExpectedRack; +use model::power_manager::PowerState; use model::rack::{ ConfigureNmxClusterState, FirmwareUpgradeDeviceStatus, FirmwareUpgradeJob, FirmwareUpgradeState, MaintenanceActivity, MaintenanceScope, NvosUpdateState, NvosUpdateSwitchStatus, Rack, RackConfig, RackFirmwareUpgradeState, RackMaintenanceState, - RackPowerState, RackState, RackValidationState, + RackPowerControlState, RackPowerControlTargetResult, RackPowerState, RackState, + RackValidationState, }; use model::rack_type::{ RackCapabilitiesSet, RackCapabilityCompute, RackCapabilityPowerShelf, RackCapabilitySwitch, @@ -55,6 +68,84 @@ use crate::tests::common::api_fixtures::{ TestEnv, TestEnvOverrides, create_test_env_with_overrides, get_config, }; +#[derive(Debug, Default)] +struct CountingComputeTrayManager { + power_control_calls: AtomicUsize, + fail_power_control: bool, +} + +impl CountingComputeTrayManager { + fn failing() -> Self { + Self { + fail_power_control: true, + ..Default::default() + } + } + + fn power_control_call_count(&self) -> usize { + self.power_control_calls.load(Ordering::SeqCst) + } +} + +#[async_trait::async_trait] +impl ComputeTrayManager for CountingComputeTrayManager { + fn name(&self) -> &str { + "counting-compute" + } + + fn backend(&self) -> ComputeBackend { + ComputeBackend::Rms + } + + async fn power_control( + &self, + endpoints: &[ComputeTrayEndpoint], + _action: PowerAction, + ) -> Result, ComponentManagerError> { + self.power_control_calls.fetch_add(1, Ordering::SeqCst); + if self.fail_power_control { + return Err(ComponentManagerError::Internal( + "synthetic compute-tray power failure".into(), + )); + } + Ok(endpoints + .iter() + .map(|endpoint| ComputeTrayResult { + bmc_ip: endpoint.bmc_ip, + success: true, + error: None, + }) + .collect()) + } + + async fn update_firmware( + &self, + _endpoints: &[ComputeTrayEndpoint], + _target_version: &str, + _components: &[ComputeTrayComponent], + _options: &FirmwareUpdateOptions, + ) -> Result, ComponentManagerError> { + Err(ComponentManagerError::Internal( + "firmware update is outside this test".into(), + )) + } + + async fn get_firmware_status( + &self, + _endpoints: &[ComputeTrayEndpoint], + ) -> Result, ComponentManagerError> { + Err(ComponentManagerError::Internal( + "firmware status is outside this test".into(), + )) + } + + async fn list_firmware_bundles(&self) -> Result, ComponentManagerError> { + Err(ComponentManagerError::Internal( + "firmware bundles are outside this test".into(), + )) + } +} + fn test_capabilities() -> RackCapabilitiesSet { RackCapabilitiesSet { compute: RackCapabilityCompute { @@ -207,6 +298,431 @@ async fn create_single_compute_rack( Ok((rack_id, host)) } +#[crate::sqlx_test] +async fn test_compute_power_dispatch_marker_prevents_prepared_replay( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env_with_overrides( + pool.clone(), + TestEnvOverrides { + config: Some(config_with_rack_profiles()), + ..Default::default() + }, + ) + .await; + let (rack_id, host) = create_single_compute_rack(&env, &pool).await?; + let machine_id = host.host_snapshot.id; + + let mut rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + rack.config.maintenance_requested = Some(MaintenanceScope { + machine_ids: vec![machine_id], + activities: vec![MaintenanceActivity::PowerControl { + action: PowerAction::ForceRestart, + }], + ..Default::default() + }); + let mut txn = pool.begin().await?; + db_rack::update(txn.as_mut(), &rack_id, &rack.config).await?; + txn.commit().await?; + + let recording_manager = Arc::new(CountingComputeTrayManager::default()); + let base_component_manager = env + .test_component_manager + .as_ref() + .expect("test component manager should be configured"); + let component_manager = ComponentManager::new( + base_component_manager.nv_switch.clone(), + base_component_manager.power_shelf.clone(), + recording_manager.clone(), + base_component_manager.nv_switch_use_state_controller, + base_component_manager.power_shelf_use_state_controller, + base_component_manager.compute_tray_use_state_controller, + ); + + let handler = RackStateHandler::default(); + let mut services = env.rack_state_handler_services(); + services.component_manager = Some(Arc::new(component_manager)); + let mut metrics = RackMetrics::default(); + let mut db_writes = DbWriteBatch::default(); + let mut ctx = StateHandlerContext:: { + services: &mut services, + metrics: &mut metrics, + pending_db_writes: &mut db_writes, + }; + + let preparing_state = RackState::Maintenance { + maintenance_state: RackMaintenanceState::PowerControl { + action: PowerAction::ForceRestart, + power_control_state: RackPowerControlState::Preparing, + }, + }; + let mut preparing_outcome = handler + .handle_object_state(&rack_id, &mut rack, &preparing_state, &mut ctx) + .await?; + assert_eq!(recording_manager.power_control_call_count(), 0); + if let Some(txn) = preparing_outcome.take_transaction() { + txn.commit().await?; + } + let prepared_state = match preparing_outcome { + StateHandlerOutcome::Transition { next_state, .. } => { + assert!(matches!( + &next_state, + RackState::Maintenance { + maintenance_state: RackMaintenanceState::PowerControl { + power_control_state: RackPowerControlState::Prepared { .. }, + .. + } + } + )); + next_state + } + other => panic!( + "Preparing should transition to Prepared, got {:?}", + std::mem::discriminant(&other) + ), + }; + let rack_after_preparation = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + assert!( + rack_after_preparation + .config + .power_control_dispatch_started_at + .is_none(), + "Preparing must not claim external dispatch" + ); + + let mut dispatch_outcome = handler + .handle_object_state(&rack_id, &mut rack, &prepared_state, &mut ctx) + .await?; + if let Some(txn) = dispatch_outcome.take_transaction() { + txn.commit().await?; + } + assert_eq!(recording_manager.power_control_call_count(), 1); + let successful_finalizing_state = match dispatch_outcome { + StateHandlerOutcome::Transition { next_state, .. } => { + match &next_state { + RackState::Maintenance { + maintenance_state: + RackMaintenanceState::PowerControl { + power_control_state: RackPowerControlState::Finalizing { result, .. }, + .. + }, + } => { + assert!(result.error.is_none()); + assert_eq!(result.target_outcomes.len(), 1); + assert!(matches!( + &result.target_outcomes[0].outcome, + RackPowerControlTargetResult::Succeeded + )); + assert_eq!(result.dispatched_bmc_ips.len(), 1); + } + other => panic!("Prepared should transition to Finalizing, got {other:?}"), + } + next_state + } + other => panic!( + "Prepared should transition to Finalizing, got {:?}", + std::mem::discriminant(&other) + ), + }; + let rack_after_dispatch = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + assert!( + rack_after_dispatch + .config + .power_control_dispatch_started_at + .is_some(), + "Prepared must durably claim dispatch before calling the backend" + ); + + // Simulate recovery from an ambiguous post-dispatch crash by loading the + // rack configuration from the database while the controller state remains + // Prepared. The durable marker must prevent a second external operation. + let mut recovered_rack = rack_after_dispatch; + let recovery_outcome = handler + .handle_object_state(&rack_id, &mut recovered_rack, &prepared_state, &mut ctx) + .await?; + assert_eq!(recording_manager.power_control_call_count(), 1); + match recovery_outcome { + StateHandlerOutcome::Transition { + next_state: + RackState::Maintenance { + maintenance_state: + RackMaintenanceState::PowerControl { + power_control_state: RackPowerControlState::Finalizing { result, .. }, + .. + }, + }, + .. + } => { + assert!( + result + .error + .as_deref() + .is_some_and(|error| error.contains("refusing to replay")) + ); + assert!(matches!( + &result.target_outcomes[0].outcome, + RackPowerControlTargetResult::Failed { .. } + )); + assert!(result.dispatched_bmc_ips.is_empty()); + } + other => panic!( + "Recovered Prepared should fail into Finalizing, got {:?}", + std::mem::discriminant(&other) + ), + } + + let mut finalizing_outcome = handler + .handle_object_state( + &rack_id, + &mut recovered_rack, + &successful_finalizing_state, + &mut ctx, + ) + .await?; + if let Some(txn) = finalizing_outcome.take_transaction() { + txn.commit().await?; + } + assert!(matches!( + finalizing_outcome, + StateHandlerOutcome::Transition { + next_state: RackState::Ready, + .. + } + )); + + let finalized_rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + assert!(finalized_rack.config.maintenance_requested.is_none()); + assert!( + finalized_rack + .config + .power_control_dispatch_started_at + .is_none() + ); + let mut connection = pool.acquire().await?; + let finalized_power_options = db::power_options::get_by_ids(&[machine_id], connection.as_mut()) + .await? + .pop() + .expect("machine should have power options"); + assert_eq!(finalized_power_options.desired_power_state, PowerState::On); + let finalized_machine = db::machine::find_one( + &pool, + &machine_id, + model::machine::machine_search_config::MachineSearchConfig::default(), + ) + .await? + .expect("machine should still exist"); + assert!(finalized_machine.health_reports.replace.is_none()); + + Ok(()) +} + +#[crate::sqlx_test] +async fn test_compute_power_backend_failure_restores_previous_power_and_health_state( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env_with_overrides( + pool.clone(), + TestEnvOverrides { + config: Some(config_with_rack_profiles()), + ..Default::default() + }, + ) + .await; + let (rack_id, host) = create_single_compute_rack(&env, &pool).await?; + let machine_id = host.host_snapshot.id; + + let previous_health_report = + health_report::HealthReport::empty("pre-existing-power-control-test".into()); + let mut txn = pool.begin().await?; + db::machine::insert_health_report( + txn.as_mut(), + &machine_id, + health_report::HealthReportApplyMode::Replace, + &previous_health_report, + false, + ) + .await?; + let previous_power_options = db::power_options::get_by_ids(&[machine_id], txn.as_mut()) + .await? + .pop() + .expect("machine should have power options"); + assert_eq!(previous_power_options.desired_power_state, PowerState::On); + txn.commit().await?; + + let mut rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + rack.config.maintenance_requested = Some(MaintenanceScope { + machine_ids: vec![machine_id], + activities: vec![MaintenanceActivity::PowerControl { + action: PowerAction::ForceRestart, + }], + ..Default::default() + }); + let mut txn = pool.begin().await?; + db_rack::update(txn.as_mut(), &rack_id, &rack.config).await?; + txn.commit().await?; + + let failing_manager = Arc::new(CountingComputeTrayManager::failing()); + let base_component_manager = env + .test_component_manager + .as_ref() + .expect("test component manager should be configured"); + let component_manager = ComponentManager::new( + base_component_manager.nv_switch.clone(), + base_component_manager.power_shelf.clone(), + failing_manager.clone(), + base_component_manager.nv_switch_use_state_controller, + base_component_manager.power_shelf_use_state_controller, + base_component_manager.compute_tray_use_state_controller, + ); + + let handler = RackStateHandler::default(); + let mut services = env.rack_state_handler_services(); + services.component_manager = Some(Arc::new(component_manager)); + let mut metrics = RackMetrics::default(); + let mut db_writes = DbWriteBatch::default(); + let mut ctx = StateHandlerContext:: { + services: &mut services, + metrics: &mut metrics, + pending_db_writes: &mut db_writes, + }; + + let preparing_state = RackState::Maintenance { + maintenance_state: RackMaintenanceState::PowerControl { + action: PowerAction::ForceRestart, + power_control_state: RackPowerControlState::Preparing, + }, + }; + let mut preparing_outcome = handler + .handle_object_state(&rack_id, &mut rack, &preparing_state, &mut ctx) + .await?; + if let Some(txn) = preparing_outcome.take_transaction() { + txn.commit().await?; + } + let prepared_state = match preparing_outcome { + StateHandlerOutcome::Transition { next_state, .. } => next_state, + other => panic!( + "Preparing should transition to Prepared, got {:?}", + std::mem::discriminant(&other) + ), + }; + let mut connection = pool.acquire().await?; + let prepared_power_options = db::power_options::get_by_ids(&[machine_id], connection.as_mut()) + .await? + .pop() + .expect("machine should have power options"); + assert_eq!(prepared_power_options.desired_power_state, PowerState::Off); + let prepared_machine = db::machine::find_one( + &pool, + &machine_id, + model::machine::machine_search_config::MachineSearchConfig::default(), + ) + .await? + .expect("machine should still exist"); + let prepared_health_report = prepared_machine + .health_reports + .replace + .as_ref() + .expect("preparation should install a temporary health override"); + assert_ne!(prepared_health_report, &previous_health_report); + assert!( + prepared_health_report + .source + .starts_with("component_power_control/") + ); + drop(connection); + + let mut dispatch_outcome = handler + .handle_object_state(&rack_id, &mut rack, &prepared_state, &mut ctx) + .await?; + if let Some(txn) = dispatch_outcome.take_transaction() { + txn.commit().await?; + } + assert_eq!(failing_manager.power_control_call_count(), 1); + let finalizing_state = match dispatch_outcome { + StateHandlerOutcome::Transition { next_state, .. } => { + match &next_state { + RackState::Maintenance { + maintenance_state: + RackMaintenanceState::PowerControl { + power_control_state: RackPowerControlState::Finalizing { result, .. }, + .. + }, + } => { + assert!(result.error.as_deref().is_some_and(|error| { + error.contains("synthetic compute-tray power failure") + })); + assert!(matches!( + &result.target_outcomes[0].outcome, + RackPowerControlTargetResult::Failed { .. } + )); + } + other => panic!("Prepared should transition to Finalizing, got {other:?}"), + } + next_state + } + other => panic!( + "Prepared should transition to Finalizing, got {:?}", + std::mem::discriminant(&other) + ), + }; + let rack_after_dispatch = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + assert!( + rack_after_dispatch + .config + .power_control_dispatch_started_at + .is_some() + ); + assert!(rack_after_dispatch.config.maintenance_requested.is_some()); + + let mut finalizing_outcome = handler + .handle_object_state(&rack_id, &mut rack, &finalizing_state, &mut ctx) + .await?; + if let Some(txn) = finalizing_outcome.take_transaction() { + txn.commit().await?; + } + match finalizing_outcome { + StateHandlerOutcome::Transition { + next_state: RackState::Error { cause }, + .. + } => assert!(cause.contains("synthetic compute-tray power failure")), + other => panic!( + "Failed Finalizing should transition to Error, got {:?}", + std::mem::discriminant(&other) + ), + } + + let finalized_rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + assert!(finalized_rack.config.maintenance_requested.is_none()); + assert!( + finalized_rack + .config + .power_control_dispatch_started_at + .is_none() + ); + let mut connection = pool.acquire().await?; + let finalized_power_options = db::power_options::get_by_ids(&[machine_id], connection.as_mut()) + .await? + .pop() + .expect("machine should have power options"); + assert_eq!( + finalized_power_options.desired_power_state, + previous_power_options.desired_power_state + ); + let finalized_machine = db::machine::find_one( + &pool, + &machine_id, + model::machine::machine_search_config::MachineSearchConfig::default(), + ) + .await? + .expect("machine should still exist"); + assert_eq!( + finalized_machine.health_reports.replace.as_ref(), + Some(&previous_health_report) + ); + + Ok(()) +} + async fn create_two_compute_rack( env: &TestEnv, pool: &sqlx::PgPool, @@ -532,6 +1048,10 @@ async fn test_on_demand_rack_maintenance_schedules_firmware_and_nvos_scope( .config .maintenance_requested .expect("maintenance should be scheduled"); + let maintenance_request_id = scope + .maintenance_request_id + .clone() + .expect("token-bearing maintenance should have a request ID"); assert_eq!(scope.switch_ids, vec![switch_id]); assert_eq!(scope.activities.len(), 2); assert!(matches!( @@ -552,6 +1072,7 @@ async fn test_on_demand_rack_maintenance_schedules_firmware_and_nvos_scope( .test_credential_manager .get_credentials(&CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: Some(maintenance_request_id), }) .await .expect("credential lookup should succeed") @@ -599,10 +1120,19 @@ async fn test_on_demand_rack_maintenance_defaults_missing_access_token_to_noauth ) .await?; + let rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + let maintenance_request_id = rack + .config + .maintenance_requested + .as_ref() + .and_then(|scope| scope.maintenance_request_id.clone()) + .expect("token-bearing maintenance should have a request ID"); + let token_credentials = env .test_credential_manager .get_credentials(&CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: Some(maintenance_request_id), }) .await .expect("credential lookup should succeed") @@ -618,6 +1148,103 @@ async fn test_on_demand_rack_maintenance_defaults_missing_access_token_to_noauth Ok(()) } +#[crate::sqlx_test] +async fn test_concurrent_rack_maintenance_cleans_only_rejected_request_token( + pool: sqlx::PgPool, +) -> Result<(), Box> { + let env = create_test_env_with_overrides(pool.clone(), TestEnvOverrides::default()).await; + let (rack_id, switch_id) = create_ready_rack_with_switch(&env, &pool).await?; + let request = |config_id: &str, access_token: &str| { + Request::new(rpc::forge::RackMaintenanceOnDemandRequest { + rack_id: Some(rack_id.clone()), + scope: Some(rpc::forge::RackMaintenanceScope { + machine_ids: vec![], + switch_ids: vec![switch_id.to_string()], + power_shelf_ids: vec![], + activities: vec![rpc::forge::MaintenanceActivityConfig { + activity: Some( + rpc::forge::maintenance_activity_config::Activity::NvosUpdate( + rpc::forge::NvosUpdateActivity { + config_json: format!(r#"{{"Id":"{config_id}"}}"#), + access_token: Some(access_token.to_string()), + }, + ), + ), + }], + }), + }) + }; + + let first = crate::handlers::rack::on_demand_rack_maintenance( + env.api.as_ref(), + request("first", "first-token"), + ); + let second = crate::handlers::rack::on_demand_rack_maintenance( + env.api.as_ref(), + request("second", "second-token"), + ); + let (first_result, second_result) = tokio::join!(first, second); + let (accepted_config_id, accepted_token_value, rejected) = match (first_result, second_result) { + (Ok(_), Err(rejected)) => ("first", "first-token", rejected), + (Err(rejected), Ok(_)) => ("second", "second-token", rejected), + (first, second) => panic!( + "exactly one concurrent maintenance request must succeed, got first={first:?}, second={second:?}" + ), + }; + assert!(rejected.message().contains("already scheduled")); + + let rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + let scope = rack + .config + .maintenance_requested + .expect("the accepted maintenance request must remain scheduled"); + let maintenance_request_id = scope + .maintenance_request_id + .expect("token-bearing maintenance should have a request ID"); + assert!(matches!( + scope.activities.as_slice(), + [MaintenanceActivity::NvosUpdate { config_json }] + if config_json == &format!(r#"{{"Id":"{accepted_config_id}"}}"#) + )); + + let accepted_credentials = env + .test_credential_manager + .get_credentials(&CredentialKey::RackMaintenanceAccessToken { + rack_id: rack_id.clone(), + maintenance_request_id: Some(maintenance_request_id), + }) + .await + .map_err(|error| eyre::eyre!("failed to read accepted request token: {error}"))? + .expect("the accepted request token must remain available"); + assert_eq!( + accepted_credentials, + Credentials::UsernamePassword { + username: "access_token".to_string(), + password: accepted_token_value.to_string(), + } + ); + assert!( + env.test_credential_manager + .get_credentials(&CredentialKey::RackMaintenanceAccessToken { + rack_id: rack_id.clone(), + maintenance_request_id: None, + }) + .await + .map_err(|error| eyre::eyre!("failed to read legacy request token: {error}"))? + .is_none(), + "request-scoped scheduling must not write the legacy shared key" + ); + assert_eq!( + env.test_credential_manager + .count_credentials_with_prefix(&format!("racks/{rack_id}/maintenance/")) + .await, + 1, + "the rejected request must clean up only its own request-scoped token" + ); + + Ok(()) +} + /// test_expected_no_definition_stays_parked verifies that a rack without an /// expected_rack record stays in Created and does not advance. #[crate::sqlx_test] @@ -1430,6 +2057,12 @@ async fn test_firmware_upgrade_start_submits_json_and_deletes_access_token( .await; let mut rack = get_db_rack(env.db_reader().as_mut(), &rack_id).await; + let maintenance_request_id = rack + .config + .maintenance_requested + .as_ref() + .and_then(|scope| scope.maintenance_request_id.clone()) + .expect("token-bearing maintenance should have a request ID"); let handler_instance = RackStateHandler::default(); let mut services = env.rack_state_handler_services(); let mut metrics = RackMetrics::default(); @@ -1479,6 +2112,7 @@ async fn test_firmware_upgrade_start_submits_json_and_deletes_access_token( .test_credential_manager .get_credentials(&CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: Some(maintenance_request_id), }) .await .expect("credential lookup should succeed"); @@ -1527,6 +2161,7 @@ async fn test_firmware_upgrade_start_missing_profile_deletes_access_token( .set_credentials( &CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: None, }, &Credentials::UsernamePassword { username: "access_token".to_string(), @@ -1573,6 +2208,7 @@ async fn test_firmware_upgrade_start_missing_profile_deletes_access_token( .test_credential_manager .get_credentials(&CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: None, }) .await .map_err(|error| eyre::eyre!("failed to get maintenance access token: {}", error))?; @@ -2163,6 +2799,7 @@ async fn test_nvos_update_start_transitions_to_wait_for_complete( .set_credentials( &CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: None, }, &Credentials::UsernamePassword { username: "access_token".to_string(), diff --git a/crates/api-db/src/power_options.rs b/crates/api-db/src/power_options.rs index ae4553ee16..d2f0634dde 100644 --- a/crates/api-db/src/power_options.rs +++ b/crates/api-db/src/power_options.rs @@ -43,7 +43,10 @@ pub async fn update_desired_state( current_version: &ConfigVersion, txn: &mut PgConnection, ) -> Result { - let query = "UPDATE power_options SET desired_power_state=$1, desired_power_state_version=$2 WHERE host_id=$3 RETURNING *"; + let query = "UPDATE power_options + SET desired_power_state=$1, desired_power_state_version=$2 + WHERE host_id=$3 AND desired_power_state_version=$4 + RETURNING *"; let config_version = current_version.increment(); @@ -51,9 +54,16 @@ pub async fn update_desired_state( .bind(power_state) .bind(config_version) .bind(host_id) + .bind(current_version) .fetch_one(txn) .await - .map_err(|e| DatabaseError::query(query, e))?; + .map_err(|error| match error { + sqlx::Error::RowNotFound => DatabaseError::ConcurrentModificationError( + "power_options", + current_version.to_string(), + ), + error => DatabaseError::query(query, error), + })?; Ok(updated_value) } diff --git a/crates/api-model/src/machine/mod.rs b/crates/api-model/src/machine/mod.rs index ec17a653cb..b11ee06214 100644 --- a/crates/api-model/src/machine/mod.rs +++ b/crates/api-model/src/machine/mod.rs @@ -1583,7 +1583,9 @@ pub struct ScoutUpgradeResult { #[serde(rename_all = "lowercase")] pub enum InitialResetPhase { Start, + WaitingForHostOff, BMCWasReset, + WaitingForHostOn, WaitHostBoot, } diff --git a/crates/api-model/src/rack.rs b/crates/api-model/src/rack.rs index be278109a5..c5ae14ade6 100644 --- a/crates/api-model/src/rack.rs +++ b/crates/api-model/src/rack.rs @@ -16,6 +16,7 @@ */ use std::collections::HashMap; use std::fmt::Display; +use std::net::IpAddr; use carbide_uuid::machine::MachineId; use carbide_uuid::power_shelf::PowerShelfId; @@ -23,6 +24,7 @@ use carbide_uuid::rack::{RackId, RackProfileId}; use carbide_uuid::switch::SwitchId; use chrono::{DateTime, Utc}; use config_version::{ConfigVersion, Versioned}; +use health_report::HealthReport; use serde::{Deserialize, Serialize}; use sqlx::postgres::PgRow; use sqlx::{FromRow, Row}; @@ -32,6 +34,7 @@ use crate::component_manager::PowerAction; use crate::controller_outcome::PersistentStateHandlerOutcome; use crate::health::HealthReportSources; use crate::metadata::Metadata; +use crate::power_manager::PowerState; // Well-known label keys! // @@ -405,13 +408,16 @@ pub enum RackState { /// Sub-states of rack maintenance. /// /// The rack enters maintenance after discovery (all devices found, all machines -/// ready) and exits into `Validation(Pending)` once maintenance is complete, -/// at which point the validation flow takes over. +/// ready) and normally exits into `Validating(Pending)` once maintenance is +/// complete, at which point the validation flow takes over. A power-control-only +/// request returns directly to `Ready` after finalization. /// /// ## Sub-state Flow /// /// ```text -/// FirmwareUpgrade -> NVOSUpdate -> ConfigureNmxCluster -> Completed -> Validation(Pending) +/// FirmwareUpgrade -> NVOSUpdate -> ConfigureNmxCluster -> PowerSequence +/// -> PowerControl -> Completed -> Validating(Pending) +/// \-> Ready (power-control-only request) /// ``` #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum RackMaintenanceState { @@ -427,6 +433,17 @@ pub enum RackMaintenanceState { PowerSequence { rack_power: RackPowerState, }, + /// Power control for the compute trays explicitly selected by the + /// maintenance scope. Unlike `PowerSequence`, this state is never entered + /// by an empty "all activities" scope. + PowerControl { + action: PowerAction, + /// Persisted preparation data makes cleanup and rollback safe across + /// controller restarts. The default keeps states serialized before + /// this field was introduced backward compatible. + #[serde(default)] + power_control_state: RackPowerControlState, + }, Completed, } @@ -449,11 +466,112 @@ impl Display for RackMaintenanceState { RackMaintenanceState::PowerSequence { rack_power } => { write!(f, "PowerSequence({})", rack_power) } + RackMaintenanceState::PowerControl { + action, + power_control_state, + } => { + write!(f, "PowerControl({action:?}, {power_control_state})") + } RackMaintenanceState::Completed => write!(f, "Completed"), } } } +/// Persisted phases for scoped compute-tray power control. +/// +/// Preparation and the transition to `Prepared` commit in the same database +/// transaction. The external backend call is made only from `Prepared`, so no +/// database transaction is held while waiting on hardware. +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub enum RackPowerControlState { + #[default] + Preparing, + Prepared { + targets: Vec, + }, + /// The external operation has returned. Database finalization runs in a + /// separate controller tick so a retry cannot redispatch restart or power + /// cycle operations merely because cleanup failed. + Finalizing { + targets: Vec, + result: RackPowerControlResult, + }, +} + +impl Display for RackPowerControlState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Preparing => f.write_str("Preparing"), + Self::Prepared { targets } => write!(f, "Prepared({} targets)", targets.len()), + Self::Finalizing { targets, result } => { + write!( + f, + "Finalizing({} targets, {} failures)", + targets.len(), + result.failed_target_count() + ) + } + } + } +} + +/// Persisted result of one external power-control dispatch. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct RackPowerControlResult { + pub target_outcomes: Vec, + /// BMC endpoints actually handed to the backend. Finalization uses this + /// persisted set for re-exploration rather than preparation-time addresses + /// that may have become stale. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub dispatched_bmc_ips: Vec, + /// Operation-level failure details, including transport and backend result + /// contract errors. Per-target failures are also summarized here for the + /// eventual rack error cause. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub error: Option, +} + +impl RackPowerControlResult { + pub fn failed_target_count(&self) -> usize { + self.target_outcomes + .iter() + .filter(|target| matches!(&target.outcome, RackPowerControlTargetResult::Failed { .. })) + .count() + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct RackPowerControlTargetOutcome { + pub machine_id: MachineId, + pub outcome: RackPowerControlTargetResult, +} + +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum RackPowerControlTargetResult { + Succeeded, + Failed { cause: String }, +} + +/// State required to clean up a prepared compute-tray power target safely. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct RackPowerControlPreparedTarget { + pub machine_id: MachineId, + pub bmc_ip: IpAddr, + pub previous_desired_power_state: PowerState, + /// Version written by preparation. Rollback restores the previous state + /// only while the row still has this version and the prepared desired + /// value, so a concurrent operator update is never overwritten. + pub prepared_desired_power_state_version: ConfigVersion, + /// Exact temporary report written by preparation. Its unique source is an + /// operation ownership token; cleanup proceeds only while this complete + /// report is still the active replacement. + pub power_control_health_report: HealthReport, + /// A replacement health report displaced by the temporary power-control + /// override. It is restored only if our override is still current. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub previous_replacement_health_report: Option, +} + /// Sub-states of `RackMaintenanceState::ConfigureNmxCluster`. /// /// `Start` advances into the NMX cluster sequence. `DisableScaleUpFabricState` @@ -649,7 +767,9 @@ impl MachineRvLabels { /// Individual maintenance activities that can be performed during on-demand /// rack maintenance. When the activities list on [`MaintenanceScope`] is -/// empty, all activities are performed. +/// empty, all standard rack-maintenance activities are performed. Per-device +/// [`MaintenanceActivity::PowerControl`] is always opt-in and must be listed +/// explicitly. /// /// Activity-specific configuration is carried inline on the variant /// (e.g. `FirmwareUpgrade` holds the optional target firmware version). @@ -675,8 +795,7 @@ pub enum MaintenanceActivity { ConfigureNmxCluster, PowerSequence, /// Per-device power control, dispatched by the rack state controller to - /// the listed devices on its next tick. Framed out here for the component - /// manager routing path; the rack state handler side is a follow-up. + /// the listed devices on its next tick. PowerControl { action: PowerAction, }, @@ -707,13 +826,20 @@ impl std::fmt::Display for MaintenanceActivity { /// is maintained. #[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct MaintenanceScope { + /// Correlates an on-demand maintenance request with request-scoped secrets. + /// + /// Older persisted scopes do not contain this field and continue to use the + /// legacy rack-scoped credential key. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub maintenance_request_id: Option, #[serde(default)] pub machine_ids: Vec, #[serde(default)] pub switch_ids: Vec, #[serde(default)] pub power_shelf_ids: Vec, - /// Which maintenance activities to perform. Empty means all activities. + /// Which maintenance activities to perform. Empty means all standard + /// rack-maintenance activities; per-device power control must be explicit. #[serde(default)] pub activities: Vec, } @@ -725,8 +851,15 @@ impl MaintenanceScope { self.machine_ids.is_empty() && self.switch_ids.is_empty() && self.power_shelf_ids.is_empty() } + /// Returns whether this scope requests an activity. An empty activity list + /// includes every standard maintenance activity, but never opts into + /// per-device power control. pub fn should_run(&self, activity: &MaintenanceActivity) -> bool { - self.activities.is_empty() || self.activities.iter().any(|a| a.same_kind(activity)) + if self.activities.is_empty() { + return !matches!(activity, MaintenanceActivity::PowerControl { .. }); + } + + self.activities.iter().any(|a| a.same_kind(activity)) } } @@ -747,6 +880,13 @@ pub struct RackConfig { /// selects full-rack vs partial-rack and which activities to run. #[serde(default)] pub maintenance_requested: Option, + + /// Durable marker written immediately before scoped compute-tray power is + /// dispatched. If a controller restarts while this marker is present and + /// the persisted power-control state is still `Prepared`, the result of + /// the external operation is ambiguous and must not be replayed. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub power_control_dispatch_started_at: Option>, } /// Reason a rack will not accept a new on-demand maintenance request. @@ -822,7 +962,7 @@ pub fn state_sla(state: &RackState, state_version: &ConfigVersion) -> StateSla { #[cfg(test)] mod tests { use carbide_test_support::Outcome::*; - use carbide_test_support::scenarios; + use carbide_test_support::{scenarios, value_scenarios}; use carbide_uuid::machine::{MachineIdSource, MachineType}; use carbide_uuid::power_shelf::{PowerShelfIdSource, PowerShelfType}; use carbide_uuid::switch::{SwitchIdSource, SwitchType}; @@ -837,6 +977,25 @@ mod tests { assert!(scope.is_full_rack()); } + #[test] + fn maintenance_scope_request_id_is_backward_compatible() { + let legacy: MaintenanceScope = serde_json::from_value(serde_json::json!({ + "machine_ids": [], + "switch_ids": [], + "power_shelf_ids": [], + "activities": [] + })) + .unwrap(); + assert_eq!(legacy.maintenance_request_id, None); + + let request_scoped = MaintenanceScope { + maintenance_request_id: Some("request-123".to_string()), + ..Default::default() + }; + let serialized = serde_json::to_value(request_scoped).unwrap(); + assert_eq!(serialized["maintenance_request_id"], "request-123"); + } + #[test] fn is_not_full_rack_with_machines() { let scope = MaintenanceScope { @@ -877,18 +1036,61 @@ mod tests { } #[test] - fn should_run_all_when_activities_empty() { - let scope = MaintenanceScope::default(); - assert!(scope.should_run(&MaintenanceActivity::FirmwareUpgrade { - firmware_version: None, - components: vec![], - force_update: false, - })); - assert!(scope.should_run(&MaintenanceActivity::NvosUpdate { - config_json: String::new(), - })); - assert!(scope.should_run(&MaintenanceActivity::ConfigureNmxCluster)); - assert!(scope.should_run(&MaintenanceActivity::PowerSequence)); + fn should_run_defaults_standard_activities_but_requires_explicit_power_control() { + value_scenarios!( + run = |(activities, requested)| MaintenanceScope { + activities, + ..Default::default() + } + .should_run(&requested); + "empty defaults firmware upgrade" { + ( + vec![], + MaintenanceActivity::FirmwareUpgrade { + firmware_version: None, + components: vec![], + force_update: false, + }, + ) => true, + } + + "empty defaults NVOS update" { + ( + vec![], + MaintenanceActivity::NvosUpdate { + config_json: String::new(), + }, + ) => true, + } + + "empty defaults NMX configuration" { + (vec![], MaintenanceActivity::ConfigureNmxCluster) => true, + } + + "empty defaults power sequence" { + (vec![], MaintenanceActivity::PowerSequence) => true, + } + + "empty does not opt into per-device power control" { + ( + vec![], + MaintenanceActivity::PowerControl { + action: PowerAction::ForceOff, + }, + ) => false, + } + + "explicit power control matches regardless of action" { + ( + vec![MaintenanceActivity::PowerControl { + action: PowerAction::ForceOff, + }], + MaintenanceActivity::PowerControl { + action: PowerAction::On, + }, + ) => true, + } + ); } #[test] @@ -1005,6 +1207,24 @@ mod tests { ); } + #[test] + fn legacy_power_control_state_defaults_to_preparing() { + let state: RackMaintenanceState = serde_json::from_value(serde_json::json!({ + "PowerControl": { + "action": "ForceOff" + } + })) + .unwrap(); + + assert_eq!( + state, + RackMaintenanceState::PowerControl { + action: PowerAction::ForceOff, + power_control_state: RackPowerControlState::Preparing, + } + ); + } + // ── Rack::check_accepts_maintenance ───────────────────────────────── fn test_rack(state: RackState, maintenance_requested: Option) -> Rack { diff --git a/crates/component-manager/src/component_manager.rs b/crates/component-manager/src/component_manager.rs index 0a8a4e8146..e957c14747 100644 --- a/crates/component-manager/src/component_manager.rs +++ b/crates/component-manager/src/component_manager.rs @@ -32,9 +32,9 @@ pub struct ComponentManager { // the expectation is that the state controller will then call the configured HAL for powershelves (RMS or PSM) // if false, the component management interface will directly dispatch to the configured HAL for powershelves, bypassing the state controller pub power_shelf_use_state_controller: bool, - // if true, the component management interface will route through the state controller for compute tray power and f/w control. - // the expectation is that the state controller will then call the configured HAL for compute tray - // if false, the component management interface will directly dispatch to the configured HAL for compute trays, bypassing the state controller + // If true, rack compute-tray firmware requests route through the rack state controller. + // Rack-associated RMS power requests always use the rack state controller unless the RPC + // explicitly requests a bypass; standalone and non-RMS power requests dispatch directly. pub compute_tray_use_state_controller: bool, } diff --git a/crates/component-manager/src/compute_tray_manager.rs b/crates/component-manager/src/compute_tray_manager.rs index 7ae91a8dd6..a16cef69b5 100644 --- a/crates/component-manager/src/compute_tray_manager.rs +++ b/crates/component-manager/src/compute_tray_manager.rs @@ -4,19 +4,32 @@ use std::fmt::Debug; use std::net::IpAddr; -use carbide_secrets::credentials::Credentials; +use carbide_secrets::credentials::{CredentialKey, Credentials}; use model::component_manager::{ComputeTrayComponent, FirmwareState, PowerAction}; use crate::error::ComponentManagerError; use crate::types::FirmwareUpdateOptions; -/// Physical network identifiers for a compute tray, used to register with and -/// operate against the backend service (CTM). +/// BMC connection details for a compute tray, used to register with and +/// operate against the selected backend service. #[derive(Debug, Clone)] pub struct ComputeTrayEndpoint { pub vendor: ComputeTrayVendor, pub bmc_ip: IpAddr, - pub bmc_credentials: Credentials, + pub bmc_port: Option, + pub authentication: ComputeTrayAuthentication, +} + +/// Authentication material for a compute tray. +/// +/// Core can defer a credential-key lookup to the shared Redfish client pool, +/// preserving its existing credential/session behavior. External backends +/// receive resolved credentials because they cannot access Core's secret +/// store directly. +#[derive(Debug, Clone)] +pub enum ComputeTrayAuthentication { + Credentials(Credentials), + CredentialKey(CredentialKey), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -25,6 +38,7 @@ pub enum ComputeTrayVendor { Dell, Hpe, Lenovo, + LenovoAmi, Supermicro, Nvidia, } diff --git a/crates/component-manager/src/config.rs b/crates/component-manager/src/config.rs index 9e426f3f95..ff870100a1 100644 --- a/crates/component-manager/src/config.rs +++ b/crates/component-manager/src/config.rs @@ -40,9 +40,12 @@ pub struct ComponentManagerConfig { #[serde(default)] pub power_shelf_use_state_controller: bool, - /// When `true`, compute power control and firmware update calls - /// go through the state controller instead of being dispatched - /// directly. + /// When `true`, rack compute-tray firmware updates go through the rack + /// state controller instead of being dispatched directly. + /// + /// Rack-associated RMS power requests always use the rack state controller + /// unless the caller sets `bypass_state_controller`; standalone and + /// non-RMS power requests dispatch directly. /// /// Defaults to `false`. #[serde(default)] diff --git a/crates/component-manager/src/core_compute_manager.rs b/crates/component-manager/src/core_compute_manager.rs index c27b6d7dc3..44c56f3d6c 100644 --- a/crates/component-manager/src/core_compute_manager.rs +++ b/crates/component-manager/src/core_compute_manager.rs @@ -8,8 +8,8 @@ use carbide_secrets::credentials::Credentials; use model::component_manager::{ComputeTrayComponent, PowerAction}; use crate::compute_tray_manager::{ - Backend, ComputeTrayEndpoint, ComputeTrayFirmwareUpdateStatus, ComputeTrayResult, - ComputeTrayVendor, + Backend, ComputeTrayAuthentication, ComputeTrayEndpoint, ComputeTrayFirmwareUpdateStatus, + ComputeTrayResult, ComputeTrayVendor, }; use crate::error::ComponentManagerError; @@ -37,6 +37,9 @@ fn map_vendor(vendor: ComputeTrayVendor) -> Option Some(RedfishVendor::Dell), ComputeTrayVendor::Hpe => Some(RedfishVendor::Hpe), ComputeTrayVendor::Lenovo => Some(RedfishVendor::Lenovo), + // BMCVendor::LenovoAMI represents both LenovoAMI and LenovoGB300 in Core's + // inventory model. Let libredfish distinguish the concrete implementation. + ComputeTrayVendor::LenovoAmi => None, ComputeTrayVendor::Supermicro => Some(RedfishVendor::Supermicro), ComputeTrayVendor::Nvidia | ComputeTrayVendor::Unknown => None, } @@ -72,21 +75,29 @@ impl crate::compute_tray_manager::ComputeTrayManager for CoreComputeTrayManager let mut results = Vec::with_capacity(endpoints.len()); for ep in endpoints { - let Credentials::UsernamePassword { - ref username, - ref password, - } = ep.bmc_credentials; - - let auth = carbide_redfish::libredfish::RedfishAuth::Direct( - username.clone(), - password.clone(), - ); + let auth = match &ep.authentication { + ComputeTrayAuthentication::Credentials(Credentials::UsernamePassword { + username, + password, + }) => carbide_redfish::libredfish::RedfishAuth::Direct( + username.clone(), + password.clone(), + ), + ComputeTrayAuthentication::CredentialKey(key) => { + carbide_redfish::libredfish::RedfishAuth::Key(key.clone()) + } + }; let vendor = map_vendor(ep.vendor); let outcome = async { let client = self .redfish_pool - .create_client(&ep.bmc_ip.to_string(), Some(443), auth, vendor) + .create_client( + &ep.bmc_ip.to_string(), + Some(ep.bmc_port.unwrap_or(443)), + auth, + vendor, + ) .await .map_err(|e| format!("failed to create Redfish client: {e}"))?; @@ -134,3 +145,38 @@ impl crate::compute_tray_manager::ComputeTrayManager for CoreComputeTrayManager )) } } + +#[cfg(test)] +mod tests { + use carbide_test_support::value_scenarios; + use libredfish::model::service_root::RedfishVendor; + + use super::*; + + #[test] + fn compute_tray_vendor_maps_to_matching_redfish_implementation() { + value_scenarios!(map_vendor: + "Dell" { + ComputeTrayVendor::Dell => Some(RedfishVendor::Dell), + } + "HPE" { + ComputeTrayVendor::Hpe => Some(RedfishVendor::Hpe), + } + "Lenovo" { + ComputeTrayVendor::Lenovo => Some(RedfishVendor::Lenovo), + } + "Lenovo AMI stays distinct from Lenovo" { + ComputeTrayVendor::LenovoAmi => None, + } + "Supermicro" { + ComputeTrayVendor::Supermicro => Some(RedfishVendor::Supermicro), + } + "NVIDIA auto-detects its concrete implementation" { + ComputeTrayVendor::Nvidia => None, + } + "unknown auto-detects" { + ComputeTrayVendor::Unknown => None, + } + ); + } +} diff --git a/crates/component-manager/src/rms.rs b/crates/component-manager/src/rms.rs index 44b1e5029c..c3efd08362 100644 --- a/crates/component-manager/src/rms.rs +++ b/crates/component-manager/src/rms.rs @@ -36,8 +36,8 @@ use sqlx::PgPool; use tracing::instrument; use crate::compute_tray_manager::{ - Backend as ComputeTrayBackend, ComputeTrayEndpoint, ComputeTrayFirmwareUpdateStatus, - ComputeTrayManager, ComputeTrayResult, + Backend as ComputeTrayBackend, ComputeTrayAuthentication, ComputeTrayEndpoint, + ComputeTrayFirmwareUpdateStatus, ComputeTrayManager, ComputeTrayResult, }; use crate::config::ComponentManagerConfig; use crate::error::ComponentManagerError; @@ -409,6 +409,19 @@ fn to_rms_power_operation(action: PowerAction) -> i32 { } } +/// Compute trays preserve the caller's exact Redfish reset semantics. Switch +/// and power-shelf callers retain the older Off/Reset mapping above. +fn to_rms_compute_power_operation(action: PowerAction) -> i32 { + match action { + PowerAction::On => rms::PowerOperation::On as i32, + PowerAction::GracefulShutdown => rms::PowerOperation::GracefulShutdown as i32, + PowerAction::ForceOff => rms::PowerOperation::ForceOff as i32, + PowerAction::GracefulRestart => rms::PowerOperation::GracefulRestart as i32, + PowerAction::ForceRestart => rms::PowerOperation::ForceRestart as i32, + PowerAction::AcPowercycle => rms::PowerOperation::Reset as i32, + } +} + fn map_rms_firmware_job_state(state: i32) -> FirmwareState { match rms::FirmwareJobState::try_from(state) { Ok(rms::FirmwareJobState::Queued) => FirmwareState::Queued, @@ -878,10 +891,6 @@ async fn list_firmware_object_ids( /// switches. Mirrors the value used by `crate::rack::firmware_update`. const SWITCH_BMC_PORT: u32 = 443; -/// Default BMC HTTPS port used when populating `rms::Endpoint` for compute -/// trays. -const COMPUTE_TRAY_BMC_PORT: u32 = 443; - fn credentials_to_rms(creds: &Credentials) -> rms::Credentials { let Credentials::UsernamePassword { username, password } = creds; rms::Credentials { @@ -1146,8 +1155,18 @@ fn build_compute_tray_node_info( identity: &RmsIdentity, bmc_mac: MacAddress, node_type: rms::NodeType, -) -> rms::NodeInfo { - rms::NodeInfo { +) -> Result { + let credentials = match &ep.authentication { + ComputeTrayAuthentication::Credentials(credentials) => credentials_to_rms(credentials), + ComputeTrayAuthentication::CredentialKey(key) => { + return Err(format!( + "RMS compute-tray operations require resolved credentials, not key {}", + key.to_key_str() + )); + } + }; + + Ok(rms::NodeInfo { node_id: identity.node_id.clone(), rack_id: identity.rack_id.clone(), r#type: Some(node_type as i32), @@ -1156,12 +1175,12 @@ fn build_compute_tray_node_info( ip_address: ep.bmc_ip.to_string(), mac_address: bmc_mac.to_string(), }), - port: COMPUTE_TRAY_BMC_PORT, - credentials: Some(credentials_to_rms(&ep.bmc_credentials)), + port: u32::from(ep.bmc_port.unwrap_or(443)), + credentials: Some(credentials), dangerously_accept_invalid_certs: true, }), host_endpoint: None, - } + }) } fn summarize_firmware_object_apply_response( @@ -1774,7 +1793,7 @@ impl ComputeTrayManager for RmsBackend { ) -> Result, ComponentManagerError> { let bmc_ips: Vec = endpoints.iter().map(|ep| ep.bmc_ip).collect(); let ids = resolve_compute_tray_identities(&self.db, &bmc_ips).await?; - let operation = to_rms_power_operation(action); + let operation = to_rms_compute_power_operation(action); let mut results = Vec::with_capacity(endpoints.len()); for ep in endpoints { @@ -1799,12 +1818,22 @@ impl ComputeTrayManager for RmsBackend { } }; - let device = build_compute_tray_node_info( + let device = match build_compute_tray_node_info( ep, resolved.identity, identity.bmc_mac, resolved.node_type, - ); + ) { + Ok(device) => device, + Err(error) => { + results.push(ComputeTrayResult { + bmc_ip: ep.bmc_ip, + success: false, + error: Some(error), + }); + continue; + } + }; let request = rms::BatchSetPowerStateRequest { nodes: Some(rms::NodeSet { nodes: vec![device], @@ -1875,12 +1904,22 @@ impl ComputeTrayManager for RmsBackend { } }; - let device = build_compute_tray_node_info( + let device = match build_compute_tray_node_info( ep, resolved.identity, identity.bmc_mac, resolved.node_type, - ); + ) { + Ok(device) => device, + Err(error) => { + results.push(ComputeTrayResult { + bmc_ip: ep.bmc_ip, + success: false, + error: Some(error), + }); + continue; + } + }; let request = match apply_firmware_object_request( device, resolved.identity, @@ -2051,7 +2090,9 @@ mod tests { }; use super::*; - use crate::compute_tray_manager::{ComputeTrayManager, ComputeTrayVendor}; + use crate::compute_tray_manager::{ + ComputeTrayAuthentication, ComputeTrayManager, ComputeTrayVendor, + }; use crate::power_shelf_manager::PowerShelfVendor; #[async_trait::async_trait] @@ -2090,6 +2131,30 @@ mod tests { ); } + #[test] + fn compute_power_action_preserves_rms_reset_semantics() { + value_scenarios!(to_rms_compute_power_operation: + "power on" { + PowerAction::On => rms::PowerOperation::On as i32, + } + "graceful shutdown" { + PowerAction::GracefulShutdown => rms::PowerOperation::GracefulShutdown as i32, + } + "force off" { + PowerAction::ForceOff => rms::PowerOperation::ForceOff as i32, + } + "graceful restart" { + PowerAction::GracefulRestart => rms::PowerOperation::GracefulRestart as i32, + } + "force restart" { + PowerAction::ForceRestart => rms::PowerOperation::ForceRestart as i32, + } + "AC power cycle" { + PowerAction::AcPowercycle => rms::PowerOperation::Reset as i32, + } + ); + } + #[test] fn firmware_job_state_maps_each_variant() { value_scenarios!(run = |state: rms::FirmwareJobState| map_rms_firmware_job_state(state as i32); @@ -2355,10 +2420,11 @@ mod tests { ComputeTrayEndpoint { vendor: ComputeTrayVendor::Nvidia, bmc_ip: bmc_ip.parse().unwrap(), - bmc_credentials: Credentials::UsernamePassword { + bmc_port: None, + authentication: ComputeTrayAuthentication::Credentials(Credentials::UsernamePassword { username: "admin".into(), password: "pass".into(), - }, + }), } } @@ -2426,6 +2492,37 @@ mod tests { assert_eq!(node.r#type, Some(rms::NodeType::SwitchGb300Nvidia as i32)); } + #[test] + fn direct_rms_compute_tray_node_info_preserves_bmc_port() { + value_scenarios!( + run = |port| { + let mut endpoint = make_ct_endpoint(CT_IP_1); + endpoint.bmc_port = port; + let identity = RmsIdentity { + node_id: "node-1".to_string(), + rack_id: "rack-1".to_string(), + rack_profile_id: None, + }; + build_compute_tray_node_info( + &endpoint, + &identity, + CT_MAC_1.parse().unwrap(), + rms::NodeType::ComputeGb200Nvidia, + ) + .expect("direct credentials should build an RMS node") + .bmc_endpoint + .expect("compute tray BMC endpoint") + .port + }; + "missing port uses the Redfish HTTPS default" { + None => 443, + } + "configured non-standard port is preserved" { + Some(8443) => 8443, + } + ); + } + #[test] fn direct_rms_firmware_object_json_request_defaults_missing_access_token_to_noauth() { let request = apply_firmware_object_request( diff --git a/crates/machine-controller/Cargo.toml b/crates/machine-controller/Cargo.toml index 9cd99b9978..66fb632d07 100644 --- a/crates/machine-controller/Cargo.toml +++ b/crates/machine-controller/Cargo.toml @@ -40,6 +40,7 @@ carbide-redfish = { path = "../redfish", default-features = false } carbide-secrets = { path = "../secrets" } carbide-state-controller-common = { path = "../state-controller-common", default-features = false } carbide-uuid = { path = "../uuid", default-features = false } +component-manager = { path = "../component-manager" } config-version = { path = "../config-version", default-features = false } state-controller = { path = "../state-controller" } diff --git a/crates/machine-controller/src/context.rs b/crates/machine-controller/src/context.rs index d6bca9ffd2..8f29aecbed 100644 --- a/crates/machine-controller/src/context.rs +++ b/crates/machine-controller/src/context.rs @@ -20,8 +20,15 @@ use std::sync::Arc; use carbide_health_metrics::PerObjectMetricsRegistry; use carbide_ipmi::IPMITool; use carbide_redfish::libredfish::RedfishClientPool; +use carbide_secrets::credentials::{BmcCredentialType, CredentialKey, CredentialReader}; +use component_manager::component_manager::ComponentManager; +use component_manager::compute_tray_manager::{ + Backend as ComputeTrayBackend, ComputeTrayAuthentication, ComputeTrayEndpoint, + ComputeTrayManager, ComputeTrayResult, ComputeTrayVendor, +}; use db::db_read::PgPoolReader; use libredfish::Redfish; +use model::component_manager::PowerAction; use model::machine::Machine; use sqlx::PgPool; use state_controller::state_handler::{StateHandlerContextObjects, StateHandlerError}; @@ -44,6 +51,14 @@ pub struct MachineStateHandlerServices { pub db_reader: PgPoolReader, /// API for interaction with Libredfish pub redfish_client_pool: Arc, + /// Core's Redfish-backed compute-tray implementation. Standalone hosts and + /// rack hosts whose configured backend is not RMS always use this path. + pub core_compute_tray_manager: Arc, + /// The configured component manager. Rack-associated hosts use its compute + /// backend when that backend is RMS. + pub component_manager: Option>, + /// Credential source used to build compute-tray backend endpoints. + pub credential_reader: Arc, /// An implementation of the IPMITool that understands how to reboot a machine pub ipmi_tool: Arc, /// Configuration used by MachineStateHandler. @@ -52,7 +67,194 @@ pub struct MachineStateHandlerServices { pub per_object_metrics_registry: Arc, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ComputeTrayRoute { + Core, + ConfiguredRms, +} + +fn compute_tray_route( + rack_associated: bool, + configured_backend: Option, +) -> ComputeTrayRoute { + if rack_associated && configured_backend == Some(ComputeTrayBackend::Rms) { + ComputeTrayRoute::ConfiguredRms + } else { + ComputeTrayRoute::Core + } +} + +fn validate_compute_tray_power_result( + backend_name: &str, + machine_id: &str, + expected_bmc_ip: std::net::IpAddr, + results: Vec, +) -> Result<(), String> { + if results.len() != 1 { + return Err(format!( + "compute-tray backend {backend_name} returned {} results for one requested machine {machine_id}", + results.len() + )); + } + + let Some(result) = results.into_iter().next() else { + return Err(format!( + "compute-tray backend {backend_name} returned no result for machine {machine_id}" + )); + }; + if result.bmc_ip != expected_bmc_ip { + return Err(format!( + "compute-tray backend {backend_name} returned a result for unexpected BMC {} instead of machine {machine_id} BMC {expected_bmc_ip}", + result.bmc_ip + )); + } + if !result.success { + return Err(format!( + "compute-tray backend {backend_name} power control failed for {machine_id}: {}", + result.error.unwrap_or_else(|| "unknown error".into()) + )); + } + + Ok(()) +} + impl MachineStateHandlerServices { + pub fn compute_tray_manager_for(&self, machine: &Machine) -> Arc { + match compute_tray_route( + machine.rack_id.is_some(), + self.component_manager + .as_ref() + .map(|component_manager| component_manager.compute_tray.backend()), + ) { + ComputeTrayRoute::Core => self.core_compute_tray_manager.clone(), + ComputeTrayRoute::ConfiguredRms => self.component_manager.as_ref().map_or_else( + || self.core_compute_tray_manager.clone(), + |component_manager| component_manager.compute_tray.clone(), + ), + } + } + + pub async fn compute_tray_endpoint( + &self, + machine: &Machine, + backend: ComputeTrayBackend, + ) -> Result { + let bmc_ip = machine + .bmc_info + .ip + .ok_or_else(|| StateHandlerError::MissingData { + object_id: machine.id.to_string(), + missing: "BMC IP address (bmc_info.ip)", + })?; + let bmc_mac = machine + .bmc_info + .mac + .ok_or_else(|| StateHandlerError::MissingData { + object_id: machine.id.to_string(), + missing: "BMC MAC address (bmc_info.mac)", + })?; + let bmc_credential_key = CredentialKey::BmcCredentials { + credential_type: BmcCredentialType::BmcRoot { + bmc_mac_address: bmc_mac, + }, + }; + let authentication = if backend == ComputeTrayBackend::Core { + // Preserve Core's existing Redfish-pool behavior: the pool owns + // credential/session lookup and test pools may intentionally + // accept the key without a materialized secret. + ComputeTrayAuthentication::CredentialKey(bmc_credential_key) + } else { + let bmc_credentials = match self + .credential_reader + .get_credentials(&bmc_credential_key) + .await + .map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "failed to load BMC credentials for {}: {}", + machine.id, + error + )) + })? { + Some(credentials) => credentials, + None => { + let sitewide_credential_key = CredentialKey::BmcCredentials { + credential_type: BmcCredentialType::SiteWideRoot, + }; + self.credential_reader + .get_credentials(&sitewide_credential_key) + .await + .map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "failed to load site-wide BMC credentials for {}: {}", + machine.id, + error + )) + })? + .ok_or_else(|| StateHandlerError::MissingData { + object_id: machine.id.to_string(), + missing: "per-BMC or site-wide BMC credentials", + })? + } + }; + ComputeTrayAuthentication::Credentials(bmc_credentials) + }; + + let vendor = match machine.bmc_vendor() { + bmc_vendor::BMCVendor::Dell => ComputeTrayVendor::Dell, + bmc_vendor::BMCVendor::Hpe => ComputeTrayVendor::Hpe, + bmc_vendor::BMCVendor::Lenovo => ComputeTrayVendor::Lenovo, + bmc_vendor::BMCVendor::LenovoAMI => ComputeTrayVendor::LenovoAmi, + bmc_vendor::BMCVendor::Supermicro => ComputeTrayVendor::Supermicro, + bmc_vendor::BMCVendor::Nvidia => ComputeTrayVendor::Nvidia, + _ => ComputeTrayVendor::Unknown, + }; + + Ok(ComputeTrayEndpoint { + vendor, + bmc_ip, + bmc_port: machine.bmc_info.port, + authentication, + }) + } + + pub async fn power_control( + &self, + machine: &Machine, + action: PowerAction, + ) -> Result<(), StateHandlerError> { + let backend = self.compute_tray_manager_for(machine); + self.power_control_with_manager(machine, backend.as_ref(), action) + .await + } + + pub async fn power_control_with_manager( + &self, + machine: &Machine, + backend: &dyn ComputeTrayManager, + action: PowerAction, + ) -> Result<(), StateHandlerError> { + let endpoint = self + .compute_tray_endpoint(machine, backend.backend()) + .await?; + let results = backend + .power_control(std::slice::from_ref(&endpoint), action) + .await + .map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "compute-tray backend {} power control failed: {}", + backend.name(), + error + )) + })?; + validate_compute_tray_power_result( + backend.name(), + &machine.id.to_string(), + endpoint.bmc_ip, + results, + ) + .map_err(|error| StateHandlerError::GenericError(eyre::eyre!(error))) + } + pub async fn create_redfish_client_from_machine( &self, machine: &Machine, @@ -75,3 +277,96 @@ impl MachineStateHandlerServices { .map_err(StateHandlerError::from) } } + +#[cfg(test)] +mod tests { + use std::net::{IpAddr, Ipv4Addr}; + + use super::*; + + #[test] + fn compute_tray_route_uses_rms_only_for_rack_machines() { + let cases = [ + ( + "standalone with RMS configured", + false, + Some(ComputeTrayBackend::Rms), + ComputeTrayRoute::Core, + ), + ( + "rack with RMS configured", + true, + Some(ComputeTrayBackend::Rms), + ComputeTrayRoute::ConfiguredRms, + ), + ( + "rack with Core configured", + true, + Some(ComputeTrayBackend::Core), + ComputeTrayRoute::Core, + ), + ( + "rack with mock configured", + true, + Some(ComputeTrayBackend::Mock), + ComputeTrayRoute::Core, + ), + ( + "rack without component manager", + true, + None, + ComputeTrayRoute::Core, + ), + ]; + + for (scenario, rack_associated, backend, expected) in cases { + assert_eq!( + compute_tray_route(rack_associated, backend), + expected, + "{scenario}" + ); + } + } + + #[test] + fn compute_tray_power_result_requires_one_matching_success() { + let expected_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 10)); + let other_ip = IpAddr::V4(Ipv4Addr::new(192, 0, 2, 11)); + let result = |bmc_ip, success, error: Option<&str>| ComputeTrayResult { + bmc_ip, + success, + error: error.map(str::to_string), + }; + let cases = [ + ( + "one matching success", + vec![result(expected_ip, true, None)], + true, + ), + ("empty result", vec![], false), + ( + "duplicate result", + vec![ + result(expected_ip, true, None), + result(expected_ip, true, None), + ], + false, + ), + ("unexpected BMC", vec![result(other_ip, true, None)], false), + ( + "matching backend failure", + vec![result(expected_ip, false, Some("rejected"))], + false, + ), + ]; + + for (scenario, results, expected) in cases { + assert_eq!( + validate_compute_tray_power_result("test", "Machine:test", expected_ip, results,) + .is_ok(), + expected, + "{scenario}" + ); + } + } +} diff --git a/crates/machine-controller/src/handler.rs b/crates/machine-controller/src/handler.rs index 9ac225b7cb..20b3ddc67d 100644 --- a/crates/machine-controller/src/handler.rs +++ b/crates/machine-controller/src/handler.rs @@ -56,6 +56,7 @@ use libredfish::{Boot, EnabledDisabled, Redfish, RedfishError, SystemPowerContro use machine_validation::{handle_machine_validation_requested, handle_machine_validation_state}; use measured_boot::records::MeasurementMachineState; use model::DpuModel; +use model::component_manager::PowerAction; use model::dpa_interface::DpaInterfaceControllerState; use model::firmware::{Firmware, FirmwareComponentType, FirmwareEntry}; use model::instance::InstanceNetworkSyncStatus; @@ -109,9 +110,7 @@ use crate::dpf::DpfOperations; use crate::health_report::{ create_host_update_health_report_dpufw, create_host_update_health_report_hostfw, }; -use crate::redfish::{ - did_dpu_finish_booting, host_power_control, host_power_control_with_location, -}; +use crate::redfish::did_dpu_finish_booting; use crate::{MeasuringOutcome, get_measuring_prerequisites, handle_measuring_state}; pub mod attestation; @@ -1220,10 +1219,12 @@ impl MachineStateHandler { .await } CreateBossVolumeState::RebootHost => { - redfish_client - .power(SystemPowerControl::ForceRestart) - .await - .map_err(|e| redfish_error("ForceRestart", e))?; + handler_host_power_control( + mh_snapshot, + ctx, + SystemPowerControl::ForceRestart, + ) + .await?; let next_state = waiting_for_cleanup_state( CleanupState::CreateBossVolume { @@ -2049,10 +2050,7 @@ async fn handle_restart_verification( } if verification_attempts >= MAX_VERIFICATION_ATTEMPTS { - host_redfish_client - .power(SystemPowerControl::ForceRestart) - .await - .map_err(|e| redfish_error("restart host", e))?; + handler_host_power_control(mh_snapshot, ctx, SystemPowerControl::ForceRestart).await?; ctx.pending_db_writes .push(MachineWriteOp::UpdateRestartVerificationStatus { @@ -3097,19 +3095,14 @@ async fn handle_dpu_reprovision( } } UnlockHostState::RebootHost => { - host_power_control( - redfish_client.as_ref(), - &state.host_snapshot, - SystemPowerControl::ForceRestart, - ctx, - ) - .await - .map_err(|e| { - StateHandlerError::GenericError(eyre!( - "failed to ForceRestart host after disabling BMC lockdown: {}", - e - )) - })?; + handler_host_power_control(state, ctx, SystemPowerControl::ForceRestart) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre!( + "failed to ForceRestart host after disabling BMC lockdown: {}", + e + )) + })?; ReprovisionState::UnlockHostForBootRepair { unlock_host_state: UnlockHostState::WaitForUefiBoot, @@ -8675,20 +8668,48 @@ impl HostUpgradeState { phase: Option, last_time: &Option>, ) -> Result, StateHandlerError> { - let redfish_client = services - .create_redfish_client_from_machine(&state.host_snapshot) - .await?; + let initial_reset_timed_out = || { + last_time.as_ref().is_some_and(|started_at| { + Utc::now().signed_duration_since(*started_at) + >= services + .site_config + .machine_state_controller + .failure_retry_time + }) + }; match phase.unwrap_or(InitialResetPhase::Start) { InitialResetPhase::Start => { - redfish_client - .power(SystemPowerControl::ForceOff) - .await - .map_err(|e| redfish_error("power off", e))?; + services + .power_control_with_manager( + &state.host_snapshot, + services.core_compute_tray_manager.as_ref(), + PowerAction::ForceOff, + ) + .await?; + + Ok(StateHandlerOutcome::transition(scenario.actual_new_state( + HostReprovisionState::InitialReset { + phase: InitialResetPhase::WaitingForHostOff, + last_time: Utc::now(), + }, + state.managed_state.get_host_repro_retry_count(), + ))) + } + InitialResetPhase::WaitingForHostOff => { + let redfish_client = services + .create_redfish_client_from_machine(&state.host_snapshot) + .await?; let status = get_power_state(redfish_client.as_ref()).await?; if status != PowerState::Off { - return Err(StateHandlerError::GenericError(eyre!( - "Host {} did not turn off when requested", + if initial_reset_timed_out() { + return Err(StateHandlerError::GenericError(eyre!( + "Host {} did not turn off before the initial-reset timeout", + state.host_snapshot.id + ))); + } + return Ok(StateHandlerOutcome::wait(format!( + "waiting for host {} to turn off before resetting its BMC", state.host_snapshot.id ))); } @@ -8706,18 +8727,66 @@ impl HostUpgradeState { ))) } InitialResetPhase::BMCWasReset => { - if let Err(_e) = redfish_client.get_tasks().await { - // BMC not fully up yet - return Ok(StateHandlerOutcome::do_nothing()); - } - redfish_client - .power(SystemPowerControl::On) + let redfish_client = match services + .create_redfish_client_from_machine(&state.host_snapshot) .await - .map_err(|e| redfish_error("power on", e))?; + { + Ok(client) => client, + Err(error) if !initial_reset_timed_out() => { + return Ok(StateHandlerOutcome::wait(format!( + "waiting for host {} BMC after reset: {error}", + state.host_snapshot.id + ))); + } + Err(error) => { + return Err(StateHandlerError::GenericError(eyre!( + "Host {} BMC did not return before the initial-reset timeout: {error}", + state.host_snapshot.id + ))); + } + }; + if let Err(error) = redfish_client.get_tasks().await { + if initial_reset_timed_out() { + return Err(StateHandlerError::GenericError(eyre!( + "Host {} BMC did not become ready before the initial-reset timeout: {error}", + state.host_snapshot.id + ))); + } + return Ok(StateHandlerOutcome::wait(format!( + "waiting for host {} BMC task service after reset: {error}", + state.host_snapshot.id + ))); + } + services + .power_control_with_manager( + &state.host_snapshot, + services.core_compute_tray_manager.as_ref(), + PowerAction::On, + ) + .await?; + + Ok(StateHandlerOutcome::transition(scenario.actual_new_state( + HostReprovisionState::InitialReset { + phase: InitialResetPhase::WaitingForHostOn, + last_time: Utc::now(), + }, + state.managed_state.get_host_repro_retry_count(), + ))) + } + InitialResetPhase::WaitingForHostOn => { + let redfish_client = services + .create_redfish_client_from_machine(&state.host_snapshot) + .await?; let status = get_power_state(redfish_client.as_ref()).await?; if status != PowerState::On { - return Err(StateHandlerError::GenericError(eyre!( - "Host {} did not turn on when requested", + if initial_reset_timed_out() { + return Err(StateHandlerError::GenericError(eyre!( + "Host {} did not turn on before the initial-reset timeout", + state.host_snapshot.id + ))); + } + return Ok(StateHandlerOutcome::wait(format!( + "waiting for host {} to turn on after resetting its BMC", state.host_snapshot.id ))); } @@ -9352,19 +9421,16 @@ impl HostUpgradeState { && !power_drains_needed.is_some() { // Needs a host power reset. We might also have used the power drains to do an AC powercycle. - let redfish_client = ctx - .services - .create_redfish_client_from_machine(&state.host_snapshot) - .await?; - // We previously possibly tried to use ACPowerycle here, however that requires enough time for the BMC to come back. We use // the power_drains_needed setting instead for that which is already aware of how to keep track of that sort of thing. - if let Err(e) = redfish_client.power(SystemPowerControl::ForceOff).await { + if let Err(e) = + handler_host_power_control(state, ctx, SystemPowerControl::ForceOff).await + { tracing::error!("Failed to power off {}: {e}", &endpoint.address); return Ok(StateHandlerOutcome::do_nothing()); } tokio::time::sleep(self.hgx_bmc_gpu_reboot_delay).await; - if let Err(e) = redfish_client.power(SystemPowerControl::On).await { + if let Err(e) = handler_host_power_control(state, ctx, SystemPowerControl::On).await { tracing::error!("Failed to power on {}: {e}", &endpoint.address); return Ok(StateHandlerOutcome::do_nothing()); } @@ -10190,6 +10256,79 @@ pub fn handler_host_power_control( handler_host_power_control_with_location(managedhost_snapshot, ctx, action, trigger_location) } +fn component_manager_power_action( + action: SystemPowerControl, +) -> Result { + match action { + SystemPowerControl::On => Ok(PowerAction::On), + SystemPowerControl::GracefulShutdown => Ok(PowerAction::GracefulShutdown), + SystemPowerControl::ForceOff => Ok(PowerAction::ForceOff), + SystemPowerControl::GracefulRestart => Ok(PowerAction::GracefulRestart), + SystemPowerControl::ForceRestart => Ok(PowerAction::ForceRestart), + SystemPowerControl::ACPowercycle => Ok(PowerAction::AcPowercycle), + SystemPowerControl::PowerCycle => Err(StateHandlerError::GenericError(eyre!( + "PowerCycle has no compute-tray component manager equivalent" + ))), + } +} + +async fn dispatch_component_manager_power_control( + machine: &Machine, + backend: &dyn component_manager::compute_tray_manager::ComputeTrayManager, + action: SystemPowerControl, + ctx: &mut StateHandlerContext<'_, MachineStateHandlerContextObjects>, + trigger_location: &std::panic::Location<'_>, +) -> Result<(), StateHandlerError> { + tracing::info!( + machine_id = %machine.id, + action = %action, + backend = backend.name(), + trigger_location = %trigger_location, + "Host Power Control" + ); + ctx.pending_db_writes + .push(MachineWriteOp::UpdateRebootRequestedTime { + machine_id: machine.id, + mode: machine_last_reboot_requested_mode(action), + time: Utc::now(), + }); + + ctx.services + .power_control_with_manager(machine, backend, component_manager_power_action(action)?) + .await +} + +async fn dispatch_core_host_power_control( + machine: &Machine, + backend: &dyn component_manager::compute_tray_manager::ComputeTrayManager, + redfish_client: &dyn Redfish, + power_state: libredfish::PowerState, + mut action: SystemPowerControl, + ctx: &mut StateHandlerContext<'_, MachineStateHandlerContextObjects>, + trigger_location: &std::panic::Location<'_>, +) -> Result { + if action == SystemPowerControl::ACPowercycle + && !redfish_client.ac_powercycle_supported_by_power() + { + action = SystemPowerControl::ForceOff; + } + + if action == SystemPowerControl::ACPowercycle && power_state != libredfish::PowerState::Off { + tracing::warn!( + machine_id = %machine.id, + %power_state, + "ACPowercycle requires chassis to be Off, forcing off first" + ); + ctx.services + .power_control_with_manager(machine, backend, PowerAction::ForceOff) + .await?; + } + + dispatch_component_manager_power_control(machine, backend, action, ctx, trigger_location) + .await?; + Ok(action) +} + pub async fn handler_host_power_control_with_location( managedhost_snapshot: &ManagedHostStateSnapshot, ctx: &mut StateHandlerContext<'_, MachineStateHandlerContextObjects>, @@ -10197,50 +10336,56 @@ pub async fn handler_host_power_control_with_location( location: &std::panic::Location<'_>, ) -> Result<(), StateHandlerError> { let mut action = action; - let redfish_client = ctx - .services - .create_redfish_client_from_machine(&managedhost_snapshot.host_snapshot) - .await?; - - let power_state = host_power_state(redfish_client.as_ref()).await?; - - let target_power_state_reached = (power_state == libredfish::PowerState::Off - && (action == SystemPowerControl::ForceOff - || action == SystemPowerControl::GracefulShutdown)) - || (power_state == libredfish::PowerState::On && action == SystemPowerControl::On); - - if target_power_state_reached { - let machine_id = &managedhost_snapshot.host_snapshot.id; - tracing::warn!(%machine_id, %power_state, %action, "Target power state is already reached. Skipping power control action"); + let machine = &managedhost_snapshot.host_snapshot; + let backend = ctx.services.compute_tray_manager_for(machine); + + if backend.backend() != component_manager::compute_tray_manager::Backend::Core { + // Non-Core backends own power-state/idempotency semantics. In particular, do not make + // direct BMC reads before an RMS operation: rack deployments may intentionally expose + // the BMC only through RMS. + dispatch_component_manager_power_control(machine, backend.as_ref(), action, ctx, location) + .await?; } else { - if power_state == libredfish::PowerState::Off - && (action == SystemPowerControl::ForceRestart - || action == SystemPowerControl::GracefulRestart) - { - // A host can't be restarted if it is in power-off state. - // In this call, power on the system. State machine restart the system in next iteration. - tracing::warn!(%power_state, %action, "Power state is Off and requested action is restart. Trying to power on the host."); - action = SystemPowerControl::On; - } - - let machine = &managedhost_snapshot.host_snapshot; - let is_restart = action == SystemPowerControl::ForceRestart - || action == SystemPowerControl::GracefulRestart; - - if is_restart && needs_ipmi_restart(machine, ctx).await? { - do_ipmi_restart(machine, ctx, action, location).await?; + let redfish_client = ctx + .services + .create_redfish_client_from_machine(machine) + .await?; + let power_state = host_power_state(redfish_client.as_ref()).await?; + let target_power_state_reached = (power_state == libredfish::PowerState::Off + && (action == SystemPowerControl::ForceOff + || action == SystemPowerControl::GracefulShutdown)) + || (power_state == libredfish::PowerState::On && action == SystemPowerControl::On); + + if target_power_state_reached { + let machine_id = &machine.id; + tracing::warn!(%machine_id, %power_state, %action, "Target power state is already reached. Skipping power control action"); } else { - host_power_control_with_location( - redfish_client.as_ref(), - machine, - action, - ctx, - location, - ) - .await - .map_err(|e| { - StateHandlerError::GenericError(eyre!("handler_host_power_control failed: {}", e)) - })?; + if power_state == libredfish::PowerState::Off + && (action == SystemPowerControl::ForceRestart + || action == SystemPowerControl::GracefulRestart) + { + // A host can't be restarted if it is in power-off state. + // In this call, power on the system. State machine restart the system in next iteration. + tracing::warn!(%power_state, %action, "Power state is Off and requested action is restart. Trying to power on the host."); + action = SystemPowerControl::On; + } + + let is_restart = action == SystemPowerControl::ForceRestart + || action == SystemPowerControl::GracefulRestart; + if is_restart && needs_ipmi_restart(machine, ctx).await? { + do_ipmi_restart(machine, ctx, action, location).await?; + } else { + action = dispatch_core_host_power_control( + machine, + backend.as_ref(), + redfish_client.as_ref(), + power_state, + action, + ctx, + location, + ) + .await?; + } } } @@ -10646,16 +10791,11 @@ async fn handle_instance_host_platform_config( } // Host is still on, issue power off command - host_power_control( - redfish_client.as_ref(), - &mh_snapshot.host_snapshot, - SystemPowerControl::ForceOff, - ctx, - ) - .await - .map_err(|e| { - StateHandlerError::GenericError(eyre!("failed to power off host: {}", e)) - })?; + handler_host_power_control(mh_snapshot, ctx, SystemPowerControl::ForceOff) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre!("failed to power off host: {}", e)) + })?; return Ok(StateHandlerOutcome::wait(format!( "waiting for {} to power OFF; current power state: {}", @@ -10696,13 +10836,8 @@ async fn handle_instance_host_platform_config( // Host is still off. Every 5th retry use AC power cycle instead of On. let next_retry = power_on_retry_count + 1; if next_retry % 5 == 0 { - match host_power_control( - redfish_client.as_ref(), - &mh_snapshot.host_snapshot, - SystemPowerControl::ACPowercycle, - ctx, - ) - .await + match handler_host_power_control(mh_snapshot, ctx, SystemPowerControl::ACPowercycle) + .await { Ok(()) => { return Ok(StateHandlerOutcome::transition( @@ -10717,10 +10852,6 @@ async fn handle_instance_host_platform_config( }, )); } - Err(RedfishError::NotSupported(_)) => { - // if not supported, just power on - tracing::info!("AC Powercycle not supported, skipping to power on"); - } Err(e) => { // TODO: Dell's return a generic error if in lockdown which needs to be changed in Redfish SDK tracing::warn!("Failed to AC Powercycle host, skipping to power on: {e}"); @@ -10728,14 +10859,11 @@ async fn handle_instance_host_platform_config( }; } - host_power_control( - redfish_client.as_ref(), - &mh_snapshot.host_snapshot, - SystemPowerControl::On, - ctx, - ) - .await - .map_err(|e| StateHandlerError::GenericError(eyre!("failed to power on host: {e}")))?; + handler_host_power_control(mh_snapshot, ctx, SystemPowerControl::On) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre!("failed to power on host: {e}")) + })?; tracing::info!( host_id = %mh_snapshot.host_snapshot.id, @@ -10791,19 +10919,14 @@ async fn handle_instance_host_platform_config( } } UnlockHostState::RebootHost => { - host_power_control( - redfish_client.as_ref(), - &mh_snapshot.host_snapshot, - SystemPowerControl::ForceRestart, - ctx, - ) - .await - .map_err(|e| { - StateHandlerError::GenericError(eyre!( - "failed to ForceRestart host after disabling BMC lockdown: {}", - e - )) - })?; + handler_host_power_control(mh_snapshot, ctx, SystemPowerControl::ForceRestart) + .await + .map_err(|e| { + StateHandlerError::GenericError(eyre!( + "failed to ForceRestart host after disabling BMC lockdown: {}", + e + )) + })?; InstanceState::HostPlatformConfiguration { platform_config_state: HostPlatformConfigurationState::UnlockHost { @@ -11491,6 +11614,39 @@ mod tests { use super::*; + #[test] + fn component_manager_power_action_maps_supported_host_actions() { + let cases = [ + (SystemPowerControl::On, Some(PowerAction::On)), + ( + SystemPowerControl::GracefulShutdown, + Some(PowerAction::GracefulShutdown), + ), + (SystemPowerControl::ForceOff, Some(PowerAction::ForceOff)), + ( + SystemPowerControl::GracefulRestart, + Some(PowerAction::GracefulRestart), + ), + ( + SystemPowerControl::ForceRestart, + Some(PowerAction::ForceRestart), + ), + ( + SystemPowerControl::ACPowercycle, + Some(PowerAction::AcPowercycle), + ), + (SystemPowerControl::PowerCycle, None), + ]; + + for (input, expected) in cases { + assert_eq!( + component_manager_power_action(input).ok(), + expected, + "input: {input:?}" + ); + } + } + #[test] fn scout_firmware_upgrade_deadline_accounts_for_each_artifact() { let started_at = chrono::DateTime::::from_str("2026-04-28T00:00:00Z").unwrap(); diff --git a/crates/machine-controller/src/redfish.rs b/crates/machine-controller/src/redfish.rs index 4f17185e04..54dad276aa 100644 --- a/crates/machine-controller/src/redfish.rs +++ b/crates/machine-controller/src/redfish.rs @@ -15,101 +15,8 @@ * limitations under the License. */ -use carbide_redfish::libredfish::conv::machine_last_reboot_requested_mode; -use chrono::Utc; use libredfish::model::BootProgress; -use libredfish::{PowerState, Redfish, RedfishError, SystemPowerControl}; -use model::machine::Machine; -use state_controller::state_handler::StateHandlerContext; - -use crate::context::MachineStateHandlerContextObjects; -use crate::write_ops::MachineWriteOp; - -#[track_caller] -pub fn host_power_control( - redfish_client: &dyn Redfish, - machine: &Machine, - action: SystemPowerControl, - ctx: &mut StateHandlerContext<'_, MachineStateHandlerContextObjects>, -) -> impl Future> { - let trigger_location = std::panic::Location::caller(); - host_power_control_with_location(redfish_client, machine, action, ctx, trigger_location) -} - -/// redfish utility functions -/// -/// host_power_control allows control over the power of the host -pub async fn host_power_control_with_location( - redfish_client: &dyn Redfish, - machine: &Machine, - action: SystemPowerControl, - ctx: &mut StateHandlerContext<'_, MachineStateHandlerContextObjects>, - trigger_location: &std::panic::Location<'_>, -) -> Result<(), RedfishError> { - let action = if action == SystemPowerControl::ACPowercycle - && !redfish_client.ac_powercycle_supported_by_power() - { - // Not supported here, so just turn off - SystemPowerControl::ForceOff - } else { - action - }; - // Always log to ensure we can see that carbide is doing the power controlling - tracing::info!( - machine_id = machine.id.to_string(), - action = action.to_string(), - trigger_location = %trigger_location, - "Host Power Control" - ); - ctx.pending_db_writes - .push(MachineWriteOp::UpdateRebootRequestedTime { - machine_id: machine.id, - mode: machine_last_reboot_requested_mode(action), - time: Utc::now(), - }); - - if (action == SystemPowerControl::GracefulRestart) - || (action == SystemPowerControl::ForceRestart) - { - let power_result: Result = redfish_client.get_power_state().await; - if let Ok(power_state) = power_result { - tracing::info!( - machine_id = machine.id.to_string(), - action = power_state.to_string(), - "Host Power State" - ); - if power_state == PowerState::Off { - tracing::info!( - machine_id = machine.id.to_string(), - action = "Manual intervention required to initiate power-on".to_string(), - "Host Power Action" - ); - /* // reserve for future proactive power on action - redfish_client - .power(SystemPowerControl::On) - .await? - */ - } else { - redfish_client.power(action).await? - } - } - } else if action == SystemPowerControl::ACPowercycle { - let power_state = redfish_client.get_power_state().await?; - if power_state != PowerState::Off { - tracing::warn!( - machine_id = machine.id.to_string(), - %power_state, - "ACPowercycle requires chassis to be Off, forcing off first" - ); - redfish_client.power(SystemPowerControl::ForceOff).await?; - } - redfish_client.power(action).await? - } else { - redfish_client.power(action).await? - } - - Ok(()) -} +use libredfish::{Redfish, RedfishError}; const LAST_OEM_STATE_OS_IS_RUNNING: &str = "OsIsRunning"; diff --git a/crates/rack-controller/Cargo.toml b/crates/rack-controller/Cargo.toml index 4a75ce8a14..2fb0f33ddb 100644 --- a/crates/rack-controller/Cargo.toml +++ b/crates/rack-controller/Cargo.toml @@ -22,13 +22,16 @@ license.workspace = true authors.workspace = true [dependencies] +bmc-vendor = { path = "../bmc-vendor" } carbide-api-db = { path = "../api-db", default-features = false } carbide-api-model = { path = "../api-model", default-features = false } carbide-health-metrics = { path = "../health-metrics", default-features = false } +carbide-health-report = { path = "../health-report", default-features = false } carbide-rack = { path = "../rack", default-features = false } carbide-secrets = { path = "../secrets", default-features = false } carbide-utils = { path = "../utils", default-features = false } carbide-uuid = { path = "../uuid", default-features = false } +component-manager = { path = "../component-manager" } config-version = { path = "../config-version", default-features = false } state-controller = { path = "../state-controller", default-features = false } @@ -44,9 +47,11 @@ serde_json = { workspace = true } sqlx = { workspace = true } tracing = { workspace = true } tonic = { workspace = true } +uuid = { workspace = true } [dev-dependencies] carbide-test-support = { path = "../test-support" } +tokio = { workspace = true } [lints] workspace = true diff --git a/crates/rack-controller/src/context.rs b/crates/rack-controller/src/context.rs index 623e6b17a4..28bcd9f19a 100644 --- a/crates/rack-controller/src/context.rs +++ b/crates/rack-controller/src/context.rs @@ -22,6 +22,7 @@ use carbide_rack::rms_client::SwitchSystemImageRmsClient; use carbide_rack_controller::config::RackConfig; use carbide_rack_controller::metrics::RackMetrics; use carbide_secrets::credentials::CredentialManager; +use component_manager::component_manager::ComponentManager; use librms::RmsApi; use sqlx::PgPool; use state_controller::state_handler::StateHandlerContextObjects; @@ -32,6 +33,8 @@ pub struct RackStateHandlerContextObjects {} #[derive(Clone)] pub struct RackStateHandlerServices { pub db_pool: PgPool, + /// Component-manager backends used for scoped device operations. + pub component_manager: Option>, /// Rack Manager Service client pub rms_client: Option>, // TODO: probably this is not the best place for config. But this diff --git a/crates/rack-controller/src/io.rs b/crates/rack-controller/src/io.rs index 2cd290d10d..06da88ec06 100644 --- a/crates/rack-controller/src/io.rs +++ b/crates/rack-controller/src/io.rs @@ -146,6 +146,7 @@ impl StateControllerIO for RackStateControllerIO { ("maintenance", "configure_nmx_cluster") } RackMaintenanceState::PowerSequence { .. } => ("maintenance", "power_sequence"), + RackMaintenanceState::PowerControl { .. } => ("maintenance", "power_control"), RackMaintenanceState::Completed => ("maintenance", "completed"), }, RackState::Error { .. } => ("error", ""), diff --git a/crates/rack-controller/src/maintenance.rs b/crates/rack-controller/src/maintenance.rs index 6fbebe9e69..1041ad8ddb 100644 --- a/crates/rack-controller/src/maintenance.rs +++ b/crates/rack-controller/src/maintenance.rs @@ -35,19 +35,35 @@ use carbide_rack_controller::fabric_manager::{ persist_primary_switch, select_primary_switch, validate_switch_inventory_for_nmx_cluster, }; use carbide_rack_controller::validating::strip_rv_labels; -use carbide_secrets::credentials::{CredentialManager, Credentials}; +use carbide_secrets::credentials::{ + BmcCredentialType, CredentialKey, CredentialManager, Credentials, +}; +use carbide_uuid::machine::MachineId; use carbide_uuid::rack::{RackId, RackProfileId}; +use component_manager::compute_tray_manager::{ + ComputeTrayAuthentication, ComputeTrayEndpoint, ComputeTrayManager, ComputeTrayResult, + ComputeTrayVendor, +}; use db::{ + ObjectFilter, explored_endpoints as db_explored_endpoints, host_machine_update as db_host_machine_update, machine as db_machine, - machine_topology as db_machine_topology, rack as db_rack, switch as db_switch, + machine_topology as db_machine_topology, power_options as db_power_options, rack as db_rack, + switch as db_switch, +}; +use health_report::{ + HealthAlertClassification, HealthProbeAlert, HealthProbeId, HealthReport, HealthReportApplyMode, }; use librms::protos::rack_manager as rms; +use model::component_manager::PowerAction; +use model::machine::machine_search_config::MachineSearchConfig; +use model::power_manager::PowerState; use model::rack::{ ConfigureNmxClusterState, FirmwareUpgradeDeviceInfo, FirmwareUpgradeDeviceStatus, FirmwareUpgradeState, MaintenanceActivity, MaintenanceScope, NvosUpdateJob, NvosUpdateState, NvosUpdateSwitchStatus, Rack, RackFirmwareUpgradeState, RackFirmwareUpgradeStatus, - RackMaintenanceState, RackPowerState, RackState, RackValidationState, SwitchNvosUpdateState, - SwitchNvosUpdateStatus, + RackMaintenanceState, RackPowerControlPreparedTarget, RackPowerControlResult, + RackPowerControlState, RackPowerControlTargetOutcome, RackPowerControlTargetResult, + RackPowerState, RackState, RackValidationState, SwitchNvosUpdateState, SwitchNvosUpdateStatus, }; use model::rack_type::RackProfile; use state_controller::state_handler::{ @@ -56,6 +72,9 @@ use state_controller::state_handler::{ use crate as carbide_rack_controller; +const MACHINE_POWER_OVERRIDE_SOURCE: &str = "component_power_control"; +const MACHINE_POWER_OVERRIDE_MESSAGE: &str = "Compute-Tray component power control in progress"; + /// Strips all `rv.*` metadata labels from every machine in the rack. /// /// Called on `Maintenance(Completed)` to ensure machines enter the next @@ -163,7 +182,7 @@ async fn transition_to_rack_error( ctx: &mut StateHandlerContext<'_, RackStateHandlerContextObjects>, ) -> Result, StateHandlerError> { let cause = cause.into(); - tracing::warn!(rack_id = %rack_id, %cause, "Rack firmware upgrade failed before polling started"); + tracing::warn!(rack_id = %rack_id, %cause, "Rack maintenance failed"); let outcome = StateHandlerOutcome::transition(RackState::Error { cause }); clear_maintenance_requested_on_error(rack_id, state, outcome, ctx).await } @@ -188,6 +207,7 @@ async fn transition_to_rack_error_with_firmware_job( }; state.firmware_upgrade_job = Some(job.clone()); state.config.maintenance_requested = None; + state.config.power_control_dispatch_started_at = None; let mut txn = ctx.services.db_pool.begin().await?; db_rack::update_firmware_upgrade_job(txn.as_mut(), rack_id, Some(&job)).await?; @@ -205,10 +225,13 @@ async fn clear_maintenance_requested_on_error( outcome: StateHandlerOutcome, ctx: &mut StateHandlerContext<'_, RackStateHandlerContextObjects>, ) -> Result, StateHandlerError> { - if state.config.maintenance_requested.is_none() { + if state.config.maintenance_requested.is_none() + && state.config.power_control_dispatch_started_at.is_none() + { return Ok(outcome); } state.config.maintenance_requested = None; + state.config.power_control_dispatch_started_at = None; let mut txn = ctx.services.db_pool.begin().await?; db_rack::update(txn.as_mut(), rack_id, &state.config).await?; Ok(outcome.with_txn(txn)) @@ -237,6 +260,57 @@ fn explicit_firmware_upgrade_requested(scope: &MaintenanceScope) -> bool { .any(|activity| matches!(activity, MaintenanceActivity::FirmwareUpgrade { .. })) } +/// Returns the explicitly requested per-device power action. +/// +/// This deliberately does not use [`MaintenanceScope::should_run`]: an empty +/// activity list means "all standard rack maintenance activities", but must +/// never turn into an implicit compute-tray power operation. +fn requested_power_control(scope: &MaintenanceScope) -> Option { + scope.activities.iter().find_map(|activity| match activity { + MaintenanceActivity::PowerControl { action } => Some(*action), + _ => None, + }) +} + +fn power_control_is_only_activity(scope: &MaintenanceScope) -> bool { + matches!( + scope.activities.as_slice(), + [MaintenanceActivity::PowerControl { .. }] + ) +} + +fn desired_power_state(action: PowerAction) -> PowerState { + match action { + PowerAction::On + | PowerAction::ForceRestart + | PowerAction::GracefulRestart + | PowerAction::AcPowercycle => PowerState::On, + PowerAction::GracefulShutdown | PowerAction::ForceOff => PowerState::Off, + } +} + +fn can_restore_desired_power_state( + current_desired_state: PowerState, + current_version: &config_version::ConfigVersion, + target: &RackPowerControlPreparedTarget, + prepared_desired_state: PowerState, +) -> bool { + current_desired_state == prepared_desired_state + && *current_version == target.prepared_desired_power_state_version +} + +fn desired_power_state_after_dispatch( + target: &RackPowerControlPreparedTarget, + action: PowerAction, + target_failed: bool, +) -> PowerState { + if target_failed { + target.previous_desired_power_state + } else { + desired_power_state(action) + } +} + fn profile_hardware_type_or_any(profile: Option<&RackProfile>) -> String { profile .map(profile_hardware_type_wire_value) @@ -260,8 +334,9 @@ fn requested_firmware_object_json_upgrade( async fn load_rack_maintenance_access_token( credential_manager: &dyn CredentialManager, rack_id: &RackId, + scope: &MaintenanceScope, ) -> Result { - let key = rack_maintenance_access_token_key(rack_id); + let key = rack_maintenance_access_token_key(rack_id, scope.maintenance_request_id.as_deref()); let credentials = credential_manager .get_credentials(&key) .await @@ -284,9 +359,13 @@ async fn load_rack_maintenance_access_token( async fn delete_rack_maintenance_access_token( credential_manager: &dyn CredentialManager, rack_id: &RackId, + scope: &MaintenanceScope, ) { if let Err(error) = credential_manager - .delete_credentials(&rack_maintenance_access_token_key(rack_id)) + .delete_credentials(&rack_maintenance_access_token_key( + rack_id, + scope.maintenance_request_id.as_deref(), + )) .await { tracing::warn!( @@ -333,7 +412,19 @@ fn next_state_after_configure(scope: &MaintenanceScope) -> RackMaintenanceState rack_power: RackPowerState::PoweringOn, } } else { - RackMaintenanceState::Completed + next_state_after_power_sequence(scope) + } +} + +/// Returns the next state after the standard rack power sequence. Per-device +/// power control is opt-in, so an empty all-activities scope completes here. +fn next_state_after_power_sequence(scope: &MaintenanceScope) -> RackMaintenanceState { + match requested_power_control(scope) { + Some(action) => RackMaintenanceState::PowerControl { + action, + power_control_state: RackPowerControlState::Preparing, + }, + None => RackMaintenanceState::Completed, } } @@ -420,213 +511,975 @@ fn filter_switch_inventory_by_scope( inventory } -fn skip_configure_nmx_cluster_outcome( - rack_id: &RackId, - reason: impl AsRef, - scope: &MaintenanceScope, -) -> StateHandlerOutcome { - let next = next_state_after_configure(scope); - tracing::info!( - rack_id = %rack_id, - reason = %reason.as_ref(), - next_state = %next, - "Skipping ConfigureNmxCluster" - ); - StateHandlerOutcome::transition(RackState::Maintenance { - maintenance_state: next, - }) -} - -fn build_switch_device_info_request( - rack_id: &RackId, - switches: &[FirmwareUpgradeDeviceInfo], - node_type: rms::NodeType, -) -> rms::BatchGetNodeDeviceInfoRequest { - rms::BatchGetNodeDeviceInfoRequest { - nodes: Some(rms::NodeSet { - nodes: switches - .iter() - .map(|switch| build_new_node_info(rack_id, switch, node_type)) - .collect(), - }), +fn map_compute_tray_vendor(vendor: bmc_vendor::BMCVendor) -> ComputeTrayVendor { + match vendor { + bmc_vendor::BMCVendor::Dell => ComputeTrayVendor::Dell, + bmc_vendor::BMCVendor::Hpe => ComputeTrayVendor::Hpe, + bmc_vendor::BMCVendor::Lenovo => ComputeTrayVendor::Lenovo, + bmc_vendor::BMCVendor::LenovoAMI => ComputeTrayVendor::LenovoAmi, + bmc_vendor::BMCVendor::Supermicro => ComputeTrayVendor::Supermicro, + bmc_vendor::BMCVendor::Nvidia => ComputeTrayVendor::Nvidia, + bmc_vendor::BMCVendor::Liteon + | bmc_vendor::BMCVendor::Delta + | bmc_vendor::BMCVendor::Unknown => ComputeTrayVendor::Unknown, } } -const NMX_CONFIGURE_RMS_CONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); +async fn lookup_compute_tray_bmc_credentials( + credential_manager: &dyn CredentialManager, + bmc_mac: mac_address::MacAddress, +) -> Result { + let key = CredentialKey::BmcCredentials { + credential_type: BmcCredentialType::BmcRoot { + bmc_mac_address: bmc_mac, + }, + }; + match credential_manager.get_credentials(&key).await { + Ok(Some(credentials)) => return Ok(credentials), + Ok(None) => {} + Err(error) => { + return Err(format!( + "failed to read BMC credentials for {bmc_mac}: {error}" + )); + } + } -fn build_nmx_configure_rms_client(rms_config: &RmsConfig) -> Option { - let url = rms_config - .api_url - .as_deref() - .filter(|url| !url.is_empty())?; - let mut rms_client_config = librms::client_config::RmsClientConfig::new( - rms_config.root_ca_path.clone(), - rms_config.client_cert.clone(), - rms_config.client_key.clone(), - rms_config.enforce_tls, - ); - rms_client_config.connect_timeout = Some(NMX_CONFIGURE_RMS_CONNECT_TIMEOUT); - let rms_api_config = librms::client::RmsApiConfig::new(url, &rms_client_config); - Some(librms::RackManagerApi::new(&rms_api_config)) + let sitewide_key = CredentialKey::BmcCredentials { + credential_type: BmcCredentialType::SiteWideRoot, + }; + credential_manager + .get_credentials(&sitewide_key) + .await + .map_err(|error| format!("failed to read site-wide BMC credentials: {error}"))? + .ok_or_else(|| format!("no BMC credentials configured for {bmc_mac} or sitewide")) } -fn rms_component_filters_from_components( - components: &[String], - compute_node_type: Option, - switch_node_type: Option, -) -> std::collections::HashMap { - if components.is_empty() { - return std::collections::HashMap::new(); +/// Resolves exactly the scoped machines that belong to this rack into +/// component-manager endpoints. The explicit scope is important: a power +/// request must never fan out to every compute tray in the rack. +async fn resolve_compute_tray_power_endpoints( + rack_id: &RackId, + machine_ids: &[MachineId], + db_pool: &sqlx::PgPool, + credential_manager: &dyn CredentialManager, +) -> Result, String> { + if machine_ids.is_empty() { + return Err("compute-tray power control requires at least one scoped machine".into()); } - let mut filters = std::collections::HashMap::new(); - if let Some(compute_node_type) = compute_node_type { - filters.insert( - compute_node_type as i32, - rms::FirmwareObjectComponentFilter { - components: components.to_vec(), - }, - ); + let mut unique_ids = std::collections::HashSet::with_capacity(machine_ids.len()); + if let Some(duplicate) = machine_ids + .iter() + .find(|machine_id| !unique_ids.insert(**machine_id)) + { + return Err(format!( + "compute-tray power control scope contains duplicate machine {duplicate}" + )); } - if let Some(switch_node_type) = switch_node_type { - filters.insert( - switch_node_type as i32, - rms::FirmwareObjectComponentFilter { - components: components.to_vec(), - }, - ); + + let mut txn = db_pool + .begin() + .await + .map_err(|error| format!("failed to begin compute-tray endpoint lookup: {error}"))?; + let machines = db_machine::find( + txn.as_mut(), + ObjectFilter::List(machine_ids), + MachineSearchConfig { + rack_id: Some(rack_id.clone()), + ..Default::default() + }, + ) + .await + .map_err(|error| format!("failed to look up scoped compute trays: {error}"))?; + txn.commit() + .await + .map_err(|error| format!("failed to finish compute-tray endpoint lookup: {error}"))?; + + let mut machines_by_id: std::collections::HashMap<_, _> = machines + .into_iter() + .map(|machine| (machine.id, machine)) + .collect(); + let mut endpoints = Vec::with_capacity(machine_ids.len()); + + for machine_id in machine_ids { + let machine = machines_by_id.remove(machine_id).ok_or_else(|| { + format!( + "scoped machine {machine_id} does not exist or is not associated with rack {rack_id}" + ) + })?; + let bmc_mac = machine.bmc_info.mac.ok_or_else(|| { + format!("scoped machine {machine_id} has no BMC MAC address recorded") + })?; + let bmc_ip = machine + .bmc_info + .ip + .ok_or_else(|| format!("scoped machine {machine_id} has no BMC IP address recorded"))?; + let bmc_credentials = + lookup_compute_tray_bmc_credentials(credential_manager, bmc_mac).await?; + + endpoints.push(ComputeTrayEndpoint { + vendor: map_compute_tray_vendor(machine.bmc_vendor()), + bmc_ip, + bmc_port: machine.bmc_info.port, + authentication: ComputeTrayAuthentication::Credentials(bmc_credentials), + }); } - filters + + Ok(endpoints) } -fn firmware_device_status( - device: FirmwareUpgradeDeviceInfo, - parent_job_id: Option, - child_jobs: &std::collections::HashMap, - node_errors: &std::collections::HashMap, - batch_error: Option<&str>, -) -> FirmwareUpgradeDeviceStatus { - let mut status = FirmwareUpgradeDeviceStatus { - node_id: device.node_id.clone(), - mac: device.mac, - bmc_ip: device.bmc_ip, - status: "in_progress".into(), - job_id: None, - parent_job_id, - error_message: None, - }; +#[derive(Clone, Debug)] +struct ComputeTrayPowerTarget { + machine_id: MachineId, + endpoint: ComputeTrayEndpoint, +} - if let Some(error_message) = node_errors.get(&device.node_id) { - status.status = "failed".into(); - status.error_message = Some(error_message.clone()); - } else if let Some(job_id) = child_jobs.get(&device.node_id) { - status.job_id = Some(job_id.clone()); - } else { - status.status = "failed".into(); - status.error_message = Some( - batch_error - .unwrap_or("RMS did not return a child firmware job for this device") - .to_string(), - ); +fn pair_prepared_compute_tray_endpoints( + prepared_targets: &[RackPowerControlPreparedTarget], + endpoints: Vec, +) -> Result, String> { + if prepared_targets.len() != endpoints.len() { + return Err(format!( + "resolved {} compute-tray endpoints for {} prepared targets", + endpoints.len(), + prepared_targets.len() + )); } - status + prepared_targets + .iter() + .zip(endpoints) + .map(|(prepared, endpoint)| { + if endpoint.bmc_ip != prepared.bmc_ip { + return Err(format!( + "BMC IP for machine {} changed from prepared {} to resolved {}", + prepared.machine_id, prepared.bmc_ip, endpoint.bmc_ip + )); + } + Ok(ComputeTrayPowerTarget { + machine_id: prepared.machine_id, + endpoint, + }) + }) + .collect() } -struct RmsFirmwareObjectJsonApply<'a> { - rack_id: &'a RackId, - profile: &'a RackProfile, - config_json: &'a str, - access_token: &'a str, - firmware_type: &'a str, - hardware_type: &'a str, - force_update: bool, - components: &'a [String], - machines: Vec, - switches: Vec, +fn failed_compute_tray_power_result( + targets: &[ComputeTrayPowerTarget], + cause: String, +) -> RackPowerControlResult { + let machine_ids: Vec<_> = targets.iter().map(|target| target.machine_id).collect(); + let mut result = failed_compute_tray_power_result_for_machine_ids(&machine_ids, cause); + result.dispatched_bmc_ips = targets + .iter() + .map(|target| target.endpoint.bmc_ip) + .collect(); + result } -async fn rms_start_firmware_upgrade_from_json( - rms_client: &dyn librms::RmsApi, - request: RmsFirmwareObjectJsonApply<'_>, -) -> Result { - let started_at = chrono::Utc::now(); - let machine_count = request.machines.len(); - let switch_count = request.switches.len(); - let mut nodes = Vec::with_capacity(machine_count + switch_count); - - // Resolve all required node types before constructing the RMS request so a - // mixed-device update fails before any partial firmware submission. - let compute_node_type = if machine_count > 0 { - Some( - compute_node_type_for_profile(request.profile).map_err(|error| { - StateHandlerError::GenericError(eyre::eyre!( - "failed to resolve RMS compute node type: {}", - error - )) - })?, - ) - } else { - None - }; - let switch_node_type = if switch_count > 0 { - Some( - switch_node_type_for_profile(request.profile).map_err(|error| { - StateHandlerError::GenericError(eyre::eyre!( - "failed to resolve RMS switch node type: {}", - error - )) - })?, - ) - } else { - None - }; +fn failed_compute_tray_power_result_for_machine_ids( + machine_ids: &[MachineId], + cause: String, +) -> RackPowerControlResult { + RackPowerControlResult { + target_outcomes: machine_ids + .iter() + .map(|machine_id| RackPowerControlTargetOutcome { + machine_id: *machine_id, + outcome: RackPowerControlTargetResult::Failed { + cause: cause.clone(), + }, + }) + .collect(), + dispatched_bmc_ips: Vec::new(), + error: Some(cause), + } +} - if let Some(node_type) = compute_node_type { - nodes.extend( - request - .machines - .iter() - .map(|device| build_new_node_info(request.rack_id, device, node_type)), - ); +fn reconcile_compute_tray_power_results( + targets: &[ComputeTrayPowerTarget], + results: &[ComputeTrayResult], +) -> RackPowerControlResult { + if targets.is_empty() { + return RackPowerControlResult { + target_outcomes: Vec::new(), + dispatched_bmc_ips: Vec::new(), + error: Some("compute-tray power control resolved no endpoints".into()), + }; } - if let Some(node_type) = switch_node_type { - nodes.extend( - request - .switches - .iter() - .map(|device| build_new_node_info(request.rack_id, device, node_type)), + let mut target_by_ip = std::collections::HashMap::with_capacity(targets.len()); + let mut duplicate_ips = std::collections::BTreeSet::new(); + for target in targets { + if target_by_ip + .insert(target.endpoint.bmc_ip, target.machine_id) + .is_some() + { + duplicate_ips.insert(target.endpoint.bmc_ip); + } + } + if !duplicate_ips.is_empty() { + return failed_compute_tray_power_result( + targets, + format!( + "compute-tray power control resolved duplicate BMC IP addresses: {}", + duplicate_ips + .iter() + .map(ToString::to_string) + .collect::>() + .join(", ") + ), ); } - let response = rms_client - .apply_firmware_object(rms::ApplyFirmwareObjectRequest { - rack_id: request.rack_id.to_string(), - config_json: request.config_json.to_string(), - access_token: Some(rms_access_token_or_noauth(Some(request.access_token))), - firmware_type: request.firmware_type.to_string(), - hardware_type: request.hardware_type.to_string(), - nodes: Some(rms::NodeSet { nodes }), - force_update: request.force_update, - component_filters: rms_component_filters_from_components( - request.components, - compute_node_type, - switch_node_type, - ), - }) - .await - .map_err(|error| { - StateHandlerError::GenericError(eyre::eyre!( - "failed to submit firmware object JSON apply to RMS: {}", - error - )) - })?; + let mut results_by_ip: std::collections::HashMap<_, Vec<_>> = + std::collections::HashMap::with_capacity(results.len()); + let mut errors = Vec::new(); + for result in results { + if !target_by_ip.contains_key(&result.bmc_ip) { + errors.push(format!( + "backend returned an unexpected result for {}", + result.bmc_ip + )); + continue; + } + results_by_ip.entry(result.bmc_ip).or_default().push(result); + } - let batch_response = response.response.as_ref(); - let batch_status = batch_response - .map(|batch_response| batch_response.status) - .unwrap_or(rms::ReturnCode::Failure as i32); + let mut target_outcomes = Vec::with_capacity(targets.len()); + for target in targets { + let outcome = match results_by_ip + .get(&target.endpoint.bmc_ip) + .map(Vec::as_slice) + { + None | Some([]) => RackPowerControlTargetResult::Failed { + cause: format!("backend returned no result for {}", target.endpoint.bmc_ip), + }, + Some([result]) if result.success => RackPowerControlTargetResult::Succeeded, + Some([result]) => RackPowerControlTargetResult::Failed { + cause: format!( + "power control failed for {}: {}", + result.bmc_ip, + result + .error + .as_deref() + .unwrap_or("backend reported failure without an error message") + ), + }, + Some(_) => RackPowerControlTargetResult::Failed { + cause: format!( + "backend returned duplicate results for {}", + target.endpoint.bmc_ip + ), + }, + }; + if let RackPowerControlTargetResult::Failed { cause } = &outcome { + errors.push(cause.clone()); + } + target_outcomes.push(RackPowerControlTargetOutcome { + machine_id: target.machine_id, + outcome, + }); + } + + RackPowerControlResult { + target_outcomes, + dispatched_bmc_ips: targets + .iter() + .map(|target| target.endpoint.bmc_ip) + .collect(), + error: (!errors.is_empty()).then(|| errors.join("; ")), + } +} + +async fn dispatch_compute_tray_power( + manager: &dyn ComputeTrayManager, + targets: &[ComputeTrayPowerTarget], + action: PowerAction, +) -> RackPowerControlResult { + if targets.is_empty() { + return reconcile_compute_tray_power_results(targets, &[]); + } + + let endpoints: Vec<_> = targets + .iter() + .map(|target| target.endpoint.clone()) + .collect(); + match manager.power_control(&endpoints, action).await { + Ok(results) => reconcile_compute_tray_power_results(targets, &results), + Err(error) => failed_compute_tray_power_result( + targets, + format!( + "compute-tray backend {} failed to dispatch power control: {error}", + manager.name() + ), + ), + } +} + +/// Durably claims the external power dispatch before making the backend call. +/// +/// The backend API has no idempotency key. Persisting this marker in a +/// transaction that completes before dispatch gives recovery an at-most-once +/// policy: an ambiguous operation is surfaced as an error instead of blindly +/// replaying a restart or AC power cycle. +async fn claim_compute_tray_power_dispatch( + rack_id: &RackId, + state: &mut Rack, + db_pool: &sqlx::PgPool, +) -> Result { + let mut txn = db_pool.begin().await?; + let rack = sqlx::query_as::<_, Rack>("SELECT * FROM racks WHERE id = $1 FOR UPDATE") + .bind(rack_id) + .fetch_optional(txn.as_mut()) + .await? + .ok_or_else(|| StateHandlerError::MissingData { + object_id: rack_id.to_string(), + missing: "rack", + })?; + + if rack.config.power_control_dispatch_started_at.is_some() { + return Ok(false); + } + + let started_at = chrono::Utc::now(); + let mut config = rack.config; + config.power_control_dispatch_started_at = Some(started_at); + db_rack::update(txn.as_mut(), rack_id, &config).await?; + txn.commit().await?; + state.config = config; + Ok(true) +} + +fn power_control_health_report(source: impl Into) -> HealthReport { + let now = chrono::Utc::now(); + HealthReport { + source: source.into(), + triggered_by: None, + observed_at: Some(now), + successes: Vec::new(), + alerts: vec![HealthProbeAlert { + id: HealthProbeId::internal_maintenance(), + target: None, + in_alert_since: Some(now), + message: MACHINE_POWER_OVERRIDE_MESSAGE.to_string(), + tenant_message: None, + classifications: vec![HealthAlertClassification::suppress_external_alerting()], + }], + } +} + +fn is_current_power_control_override( + report: Option<&HealthReport>, + expected: &HealthReport, +) -> bool { + report == Some(expected) +} + +/// Locks all existing power-option rows before a read/check/update sequence. +/// +/// The lower-level update is version-conditional as well; the lock keeps the +/// multi-row preparation/finalization preflight stable while health reports +/// and desired states are changed in the same transaction. +async fn lock_power_options_for_update( + machine_ids: &[MachineId], + txn: &mut sqlx::PgConnection, +) -> db::DatabaseResult<()> { + const QUERY: &str = "SELECT host_id FROM power_options WHERE host_id = ANY($1) FOR UPDATE"; + sqlx::query(QUERY) + .bind(machine_ids) + .fetch_all(txn) + .await + .map_err(|error| db::DatabaseError::query(QUERY, error))?; + Ok(()) +} + +struct PowerControlPreparationTarget { + machine_id: MachineId, + bmc_ip: std::net::IpAddr, + previous_desired_power_state: PowerState, + previous_desired_power_state_version: config_version::ConfigVersion, + previous_replacement_health_report: Option, +} + +async fn prepare_compute_tray_power_control( + rack_id: &RackId, + machine_ids: &[MachineId], + txn: &mut sqlx::PgConnection, +) -> Result, String> { + if machine_ids.is_empty() { + return Err("compute-tray power control requires at least one scoped machine".into()); + } + + let mut unique_ids = std::collections::HashSet::with_capacity(machine_ids.len()); + if let Some(duplicate) = machine_ids + .iter() + .find(|machine_id| !unique_ids.insert(**machine_id)) + { + return Err(format!( + "compute-tray power control scope contains duplicate machine {duplicate}" + )); + } + + let machines = db_machine::find( + &mut *txn, + ObjectFilter::List(machine_ids), + MachineSearchConfig { + rack_id: Some(rack_id.clone()), + for_update: true, + ..Default::default() + }, + ) + .await + .map_err(|error| format!("failed to lock scoped compute trays: {error}"))?; + + lock_power_options_for_update(machine_ids, txn) + .await + .map_err(|error| format!("failed to lock scoped power options: {error}"))?; + let power_options = db_power_options::get_by_ids(machine_ids, txn) + .await + .map_err(|error| format!("failed to read scoped power options: {error}"))?; + + let mut machines_by_id: std::collections::HashMap<_, _> = machines + .into_iter() + .map(|machine| (machine.id, machine)) + .collect(); + let mut power_options_by_id: std::collections::HashMap<_, _> = power_options + .into_iter() + .map(|options| (options.host_id, options)) + .collect(); + let mut machine_by_bmc_ip = std::collections::HashMap::with_capacity(machine_ids.len()); + let mut preparation_targets = Vec::with_capacity(machine_ids.len()); + + // Validate every target before making the first write. Any write failure + // below still rolls the whole preparation transaction back. + for machine_id in machine_ids { + let machine = machines_by_id.remove(machine_id).ok_or_else(|| { + format!( + "scoped machine {machine_id} does not exist or is not associated with rack {rack_id}" + ) + })?; + let bmc_ip = machine + .bmc_info + .ip + .ok_or_else(|| format!("scoped machine {machine_id} has no BMC IP address recorded"))?; + if let Some(other_machine_id) = machine_by_bmc_ip.insert(bmc_ip, *machine_id) { + return Err(format!( + "scoped machines {other_machine_id} and {machine_id} have duplicate BMC IP {bmc_ip}" + )); + } + let power_options = power_options_by_id + .remove(machine_id) + .ok_or_else(|| format!("scoped machine {machine_id} has no power-options record"))?; + + preparation_targets.push(PowerControlPreparationTarget { + machine_id: *machine_id, + bmc_ip, + previous_desired_power_state: power_options.desired_power_state, + previous_desired_power_state_version: power_options.desired_power_state_version, + previous_replacement_health_report: machine.health_reports.replace, + }); + } + + // Keep the machine power manager passive until the rack backend has + // accepted the operation. In particular, staging `On` here would let the + // independent machine controller race the rack dispatch. + let prepared_desired_power_state = PowerState::Off; + let operation_id = uuid::Uuid::new_v4(); + let mut prepared_targets = Vec::with_capacity(preparation_targets.len()); + for target in preparation_targets { + let health_report = power_control_health_report(format!( + "{MACHINE_POWER_OVERRIDE_SOURCE}/{operation_id}/{}", + target.machine_id + )); + db_machine::insert_health_report( + txn, + &target.machine_id, + HealthReportApplyMode::Replace, + &health_report, + false, + ) + .await + .map_err(|error| { + format!( + "failed to insert power-control health override for {}: {error}", + target.machine_id + ) + })?; + let updated = db_power_options::update_desired_state( + &target.machine_id, + prepared_desired_power_state, + &target.previous_desired_power_state_version, + txn, + ) + .await + .map_err(|error| { + format!( + "failed to set desired power state for {}: {error}", + target.machine_id + ) + })?; + + prepared_targets.push(RackPowerControlPreparedTarget { + machine_id: target.machine_id, + bmc_ip: target.bmc_ip, + previous_desired_power_state: target.previous_desired_power_state, + prepared_desired_power_state_version: updated.desired_power_state_version, + power_control_health_report: health_report, + previous_replacement_health_report: target.previous_replacement_health_report, + }); + } + + Ok(prepared_targets) +} + +async fn request_power_control_reexploration_best_effort( + rack_id: &RackId, + dispatched_bmc_ips: &[std::net::IpAddr], + db_pool: &sqlx::PgPool, +) { + let bmc_ips: Vec<_> = dispatched_bmc_ips + .iter() + .copied() + .collect::>() + .into_iter() + .collect(); + if bmc_ips.is_empty() { + return; + } + let mut txn = match db_pool.begin().await { + Ok(txn) => txn, + Err(error) => { + tracing::warn!( + rack_id = %rack_id, + error = %error, + "Failed to begin post-power-control endpoint re-exploration", + ); + return; + } + }; + if let Err(error) = + db_explored_endpoints::request_exploration_for_addresses(&bmc_ips, txn.as_mut()).await + { + tracing::warn!( + rack_id = %rack_id, + error = %error, + "Failed to request post-power-control endpoint re-exploration", + ); + return; + } + if let Err(error) = txn.commit().await { + tracing::warn!( + rack_id = %rack_id, + error = %error, + "Failed to commit post-power-control endpoint re-exploration", + ); + } +} + +async fn finalize_compute_tray_power_control( + rack_id: &RackId, + state: &mut Rack, + scope: &MaintenanceScope, + action: PowerAction, + targets: &[RackPowerControlPreparedTarget], + result: &RackPowerControlResult, + ctx: &mut StateHandlerContext<'_, RackStateHandlerContextObjects>, +) -> Result, StateHandlerError> { + request_power_control_reexploration_best_effort( + rack_id, + &result.dispatched_bmc_ips, + &ctx.services.db_pool, + ) + .await; + + let machine_ids: Vec<_> = targets.iter().map(|target| target.machine_id).collect(); + let mut txn = ctx.services.db_pool.begin().await?; + let machines = db_machine::find( + txn.as_mut(), + ObjectFilter::List(&machine_ids), + MachineSearchConfig { + for_update: true, + ..Default::default() + }, + ) + .await?; + lock_power_options_for_update(&machine_ids, txn.as_mut()).await?; + let power_options = db_power_options::get_by_ids(&machine_ids, txn.as_mut()).await?; + + let machines_by_id: std::collections::HashMap<_, _> = machines + .into_iter() + .map(|machine| (machine.id, machine)) + .collect(); + let power_options_by_id: std::collections::HashMap<_, _> = power_options + .into_iter() + .map(|options| (options.host_id, options)) + .collect(); + let prepared_desired_state = PowerState::Off; + + let target_ids: std::collections::HashSet<_> = + targets.iter().map(|target| target.machine_id).collect(); + let mut outcome_by_machine = std::collections::HashMap::new(); + let mut duplicate_outcomes = std::collections::HashSet::new(); + let mut failure_causes = std::collections::BTreeSet::new(); + if let Some(error) = &result.error { + failure_causes.insert(error.clone()); + } + for target_outcome in &result.target_outcomes { + if !target_ids.contains(&target_outcome.machine_id) { + failure_causes.insert(format!( + "power-control result contains unexpected machine {}", + target_outcome.machine_id + )); + continue; + } + if outcome_by_machine + .insert(target_outcome.machine_id, &target_outcome.outcome) + .is_some() + { + duplicate_outcomes.insert(target_outcome.machine_id); + failure_causes.insert(format!( + "power-control result contains duplicate outcomes for {}", + target_outcome.machine_id + )); + } + } + + for target in targets { + let target_failure = if duplicate_outcomes.contains(&target.machine_id) { + Some("backend produced duplicate outcomes for this machine".to_string()) + } else { + match outcome_by_machine.get(&target.machine_id).copied() { + Some(RackPowerControlTargetResult::Succeeded) => None, + Some(RackPowerControlTargetResult::Failed { cause }) => Some(cause.clone()), + None => Some("backend produced no outcome for this machine".to_string()), + } + }; + if let Some(cause) = &target_failure { + failure_causes.insert(format!("{}: {cause}", target.machine_id)); + } + + match power_options_by_id.get(&target.machine_id) { + Some(current) + if can_restore_desired_power_state( + current.desired_power_state, + ¤t.desired_power_state_version, + target, + prepared_desired_state, + ) => + { + let finalized_desired_state = + desired_power_state_after_dispatch(target, action, target_failure.is_some()); + if current.desired_power_state != finalized_desired_state { + db_power_options::update_desired_state( + &target.machine_id, + finalized_desired_state, + ¤t.desired_power_state_version, + txn.as_mut(), + ) + .await?; + } + } + Some(current) => { + tracing::warn!( + rack_id = %rack_id, + machine_id = %target.machine_id, + current_desired_state = ?current.desired_power_state, + current_version = %current.desired_power_state_version, + prepared_desired_state = ?prepared_desired_state, + prepared_version = %target.prepared_desired_power_state_version, + failed = target_failure.is_some(), + "Skipping power-state finalization because the prepared write is no longer current", + ); + } + None => { + tracing::warn!( + rack_id = %rack_id, + machine_id = %target.machine_id, + "Skipping power-state finalization because power options are missing", + ); + failure_causes.insert(format!( + "power options are missing for machine {}", + target.machine_id + )); + } + } + + let Some(machine) = machines_by_id.get(&target.machine_id) else { + tracing::warn!( + rack_id = %rack_id, + machine_id = %target.machine_id, + "Unable to clean up power-control health override because the machine is missing", + ); + continue; + }; + if !is_current_power_control_override( + machine.health_reports.replace.as_ref(), + &target.power_control_health_report, + ) { + tracing::warn!( + rack_id = %rack_id, + machine_id = %target.machine_id, + current_replace_source = machine + .health_reports + .replace + .as_ref() + .map(|report| report.source.as_str()), + "Skipping health override cleanup because the power-control override is no longer current", + ); + continue; + } + + if let Some(previous_report) = &target.previous_replacement_health_report { + db_machine::insert_health_report( + txn.as_mut(), + &target.machine_id, + HealthReportApplyMode::Replace, + previous_report, + false, + ) + .await?; + } else { + db_machine::remove_health_report( + txn.as_mut(), + &target.machine_id, + HealthReportApplyMode::Replace, + &target.power_control_health_report.source, + ) + .await?; + } + } + + let next_state = if !failure_causes.is_empty() { + let cause = failure_causes.into_iter().collect::>().join("; "); + tracing::warn!(rack_id = %rack_id, %cause, "Scoped compute-tray power control failed"); + state.config.power_control_dispatch_started_at = None; + if state.config.maintenance_requested.take().is_some() { + db_rack::update(txn.as_mut(), rack_id, &state.config).await?; + } + RackState::Error { cause } + } else if power_control_is_only_activity(scope) { + state.config.power_control_dispatch_started_at = None; + if state.config.maintenance_requested.take().is_some() { + db_rack::update(txn.as_mut(), rack_id, &state.config).await?; + } + RackState::Ready + } else { + RackState::Maintenance { + maintenance_state: RackMaintenanceState::Completed, + } + }; + + Ok(StateHandlerOutcome::transition(next_state).with_txn(txn)) +} + +fn skip_configure_nmx_cluster_outcome( + rack_id: &RackId, + reason: impl AsRef, + scope: &MaintenanceScope, +) -> StateHandlerOutcome { + let next = next_state_after_configure(scope); + tracing::info!( + rack_id = %rack_id, + reason = %reason.as_ref(), + next_state = %next, + "Skipping ConfigureNmxCluster" + ); + StateHandlerOutcome::transition(RackState::Maintenance { + maintenance_state: next, + }) +} + +fn build_switch_device_info_request( + rack_id: &RackId, + switches: &[FirmwareUpgradeDeviceInfo], + node_type: rms::NodeType, +) -> rms::BatchGetNodeDeviceInfoRequest { + rms::BatchGetNodeDeviceInfoRequest { + nodes: Some(rms::NodeSet { + nodes: switches + .iter() + .map(|switch| build_new_node_info(rack_id, switch, node_type)) + .collect(), + }), + } +} + +const NMX_CONFIGURE_RMS_CONNECT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); + +fn build_nmx_configure_rms_client(rms_config: &RmsConfig) -> Option { + let url = rms_config + .api_url + .as_deref() + .filter(|url| !url.is_empty())?; + let mut rms_client_config = librms::client_config::RmsClientConfig::new( + rms_config.root_ca_path.clone(), + rms_config.client_cert.clone(), + rms_config.client_key.clone(), + rms_config.enforce_tls, + ); + rms_client_config.connect_timeout = Some(NMX_CONFIGURE_RMS_CONNECT_TIMEOUT); + let rms_api_config = librms::client::RmsApiConfig::new(url, &rms_client_config); + Some(librms::RackManagerApi::new(&rms_api_config)) +} + +fn rms_component_filters_from_components( + components: &[String], + compute_node_type: Option, + switch_node_type: Option, +) -> std::collections::HashMap { + if components.is_empty() { + return std::collections::HashMap::new(); + } + + let mut filters = std::collections::HashMap::new(); + if let Some(compute_node_type) = compute_node_type { + filters.insert( + compute_node_type as i32, + rms::FirmwareObjectComponentFilter { + components: components.to_vec(), + }, + ); + } + if let Some(switch_node_type) = switch_node_type { + filters.insert( + switch_node_type as i32, + rms::FirmwareObjectComponentFilter { + components: components.to_vec(), + }, + ); + } + filters +} + +fn firmware_device_status( + device: FirmwareUpgradeDeviceInfo, + parent_job_id: Option, + child_jobs: &std::collections::HashMap, + node_errors: &std::collections::HashMap, + batch_error: Option<&str>, +) -> FirmwareUpgradeDeviceStatus { + let mut status = FirmwareUpgradeDeviceStatus { + node_id: device.node_id.clone(), + mac: device.mac, + bmc_ip: device.bmc_ip, + status: "in_progress".into(), + job_id: None, + parent_job_id, + error_message: None, + }; + + if let Some(error_message) = node_errors.get(&device.node_id) { + status.status = "failed".into(); + status.error_message = Some(error_message.clone()); + } else if let Some(job_id) = child_jobs.get(&device.node_id) { + status.job_id = Some(job_id.clone()); + } else { + status.status = "failed".into(); + status.error_message = Some( + batch_error + .unwrap_or("RMS did not return a child firmware job for this device") + .to_string(), + ); + } + + status +} + +struct RmsFirmwareObjectJsonApply<'a> { + rack_id: &'a RackId, + profile: &'a RackProfile, + config_json: &'a str, + access_token: &'a str, + firmware_type: &'a str, + hardware_type: &'a str, + force_update: bool, + components: &'a [String], + machines: Vec, + switches: Vec, +} + +async fn rms_start_firmware_upgrade_from_json( + rms_client: &dyn librms::RmsApi, + request: RmsFirmwareObjectJsonApply<'_>, +) -> Result { + let started_at = chrono::Utc::now(); + let machine_count = request.machines.len(); + let switch_count = request.switches.len(); + let mut nodes = Vec::with_capacity(machine_count + switch_count); + + // Resolve all required node types before constructing the RMS request so a + // mixed-device update fails before any partial firmware submission. + let compute_node_type = if machine_count > 0 { + Some( + compute_node_type_for_profile(request.profile).map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "failed to resolve RMS compute node type: {}", + error + )) + })?, + ) + } else { + None + }; + let switch_node_type = if switch_count > 0 { + Some( + switch_node_type_for_profile(request.profile).map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "failed to resolve RMS switch node type: {}", + error + )) + })?, + ) + } else { + None + }; + + if let Some(node_type) = compute_node_type { + nodes.extend( + request + .machines + .iter() + .map(|device| build_new_node_info(request.rack_id, device, node_type)), + ); + } + + if let Some(node_type) = switch_node_type { + nodes.extend( + request + .switches + .iter() + .map(|device| build_new_node_info(request.rack_id, device, node_type)), + ); + } + + let response = rms_client + .apply_firmware_object(rms::ApplyFirmwareObjectRequest { + rack_id: request.rack_id.to_string(), + config_json: request.config_json.to_string(), + access_token: Some(rms_access_token_or_noauth(Some(request.access_token))), + firmware_type: request.firmware_type.to_string(), + hardware_type: request.hardware_type.to_string(), + nodes: Some(rms::NodeSet { nodes }), + force_update: request.force_update, + component_filters: rms_component_filters_from_components( + request.components, + compute_node_type, + switch_node_type, + ), + }) + .await + .map_err(|error| { + StateHandlerError::GenericError(eyre::eyre!( + "failed to submit firmware object JSON apply to RMS: {}", + error + )) + })?; + + let batch_response = response.response.as_ref(); + let batch_status = batch_response + .map(|batch_response| batch_response.status) + .unwrap_or(rms::ReturnCode::Failure as i32); let batch_job_id = batch_response .map(|batch_response| batch_response.job_id.as_str()) .unwrap_or_default(); @@ -1190,6 +2043,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; return transition_to_rack_error(id, state, "RMS client not configured", ctx) @@ -1198,6 +2052,7 @@ pub async fn handle_maintenance( let access_token = match load_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await { @@ -1232,6 +2087,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; } @@ -1249,6 +2105,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; return transition_to_rack_error( @@ -1290,6 +2147,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; } @@ -1346,6 +2204,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; } @@ -1365,6 +2224,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; } @@ -1538,6 +2398,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; return transition_to_rack_error(id, state, "RMS client not configured", ctx) @@ -1546,6 +2407,7 @@ pub async fn handle_maintenance( let access_token = match load_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await { @@ -1577,6 +2439,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; let next = next_state_after_nvos(scope); @@ -1593,6 +2456,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; return transition_to_rack_error( @@ -1609,6 +2473,7 @@ pub async fn handle_maintenance( delete_rack_maintenance_access_token( ctx.services.credential_manager.as_ref(), id, + scope, ) .await; return transition_to_rack_error(id, state, error.to_string(), ctx).await; @@ -1658,8 +2523,12 @@ pub async fn handle_maintenance( switch_inventory.switches, ) .await; - delete_rack_maintenance_access_token(ctx.services.credential_manager.as_ref(), id) - .await; + delete_rack_maintenance_access_token( + ctx.services.credential_manager.as_ref(), + id, + scope, + ) + .await; let job = match submit_result { Ok(job) => job, @@ -2241,20 +3110,129 @@ pub async fn handle_maintenance( tracing::info!("Rack {} power sequence (on) - stubbed", id); Ok(StateHandlerOutcome::transition(RackState::Maintenance { - maintenance_state: RackMaintenanceState::Completed, + maintenance_state: next_state_after_power_sequence(scope), + })) + } + RackPowerState::PoweringOff => { + tracing::info!("Rack {} power sequence (off) - stubbed", id); + Ok(StateHandlerOutcome::wait( + "power sequence (off) in progress".into(), + )) + } + RackPowerState::PowerReset => { + tracing::info!("Rack {} power sequence (reset) - stubbed", id); + Ok(StateHandlerOutcome::wait( + "power sequence (reset) in progress".into(), + )) + } + }, + RackMaintenanceState::PowerControl { + action, + power_control_state, + } => match power_control_state { + RackPowerControlState::Preparing => { + let mut txn = ctx.services.db_pool.begin().await?; + // A newly prepared operation supersedes any marker left by a + // previously completed or manually recovered maintenance run. + if state + .config + .power_control_dispatch_started_at + .take() + .is_some() + { + db_rack::update(txn.as_mut(), id, &state.config).await?; + } + let targets = + match prepare_compute_tray_power_control(id, &scope.machine_ids, txn.as_mut()) + .await + { + Ok(targets) => targets, + Err(cause) => { + drop(txn); + return transition_to_rack_error(id, state, cause, ctx).await; + } + }; + + tracing::info!( + rack_id = %id, + action = ?action, + machine_count = targets.len(), + "Prepared scoped compute-tray power control", + ); + Ok(StateHandlerOutcome::transition(RackState::Maintenance { + maintenance_state: RackMaintenanceState::PowerControl { + action: *action, + power_control_state: RackPowerControlState::Prepared { targets }, + }, + }) + .with_txn(txn)) + } + RackPowerControlState::Prepared { targets } => { + let machine_ids: Vec<_> = targets.iter().map(|target| target.machine_id).collect(); + let dispatch_claimed = + claim_compute_tray_power_dispatch(id, state, &ctx.services.db_pool).await?; + let result = if !dispatch_claimed { + failed_compute_tray_power_result_for_machine_ids( + &machine_ids, + "a previous compute-tray power dispatch was started without a durable result; refusing to replay a potentially non-idempotent operation" + .into(), + ) + } else if let Some(component_manager) = ctx.services.component_manager.clone() { + match resolve_compute_tray_power_endpoints( + id, + &machine_ids, + &ctx.services.db_pool, + ctx.services.credential_manager.as_ref(), + ) + .await + { + Ok(endpoints) => { + match pair_prepared_compute_tray_endpoints(targets, endpoints) { + Ok(dispatch_targets) => { + tracing::info!( + rack_id = %id, + action = ?action, + backend = component_manager.compute_tray.name(), + machine_count = dispatch_targets.len(), + "Dispatching scoped compute-tray power control", + ); + dispatch_compute_tray_power( + component_manager.compute_tray.as_ref(), + &dispatch_targets, + *action, + ) + .await + } + Err(cause) => failed_compute_tray_power_result_for_machine_ids( + &machine_ids, + cause, + ), + } + } + Err(cause) => { + failed_compute_tray_power_result_for_machine_ids(&machine_ids, cause) + } + } + } else { + failed_compute_tray_power_result_for_machine_ids( + &machine_ids, + "component manager is not configured for compute-tray power control".into(), + ) + }; + + Ok(StateHandlerOutcome::transition(RackState::Maintenance { + maintenance_state: RackMaintenanceState::PowerControl { + action: *action, + power_control_state: RackPowerControlState::Finalizing { + targets: targets.clone(), + result, + }, + }, })) } - RackPowerState::PoweringOff => { - tracing::info!("Rack {} power sequence (off) - stubbed", id); - Ok(StateHandlerOutcome::wait( - "power sequence (off) in progress".into(), - )) - } - RackPowerState::PowerReset => { - tracing::info!("Rack {} power sequence (reset) - stubbed", id); - Ok(StateHandlerOutcome::wait( - "power sequence (reset) in progress".into(), - )) + RackPowerControlState::Finalizing { targets, result } => { + finalize_compute_tray_power_control(id, state, scope, *action, targets, result, ctx) + .await } }, RackMaintenanceState::Completed => { @@ -2268,8 +3246,11 @@ pub async fn handle_maintenance( validating_state: RackValidationState::Pending, }); - if state.config.maintenance_requested.is_some() { + if state.config.maintenance_requested.is_some() + || state.config.power_control_dispatch_started_at.is_some() + { state.config.maintenance_requested = None; + state.config.power_control_dispatch_started_at = None; let mut txn = ctx.services.db_pool.begin().await?; db_rack::update(txn.as_mut(), id, &state.config).await?; outcome = outcome.with_txn(txn); @@ -2282,23 +3263,171 @@ pub async fn handle_maintenance( #[cfg(test)] mod tests { + use std::net::{IpAddr, Ipv4Addr}; + use std::sync::Mutex; + use carbide_rack::firmware_update::RackFirmwareInventory; + use carbide_secrets::credentials::Credentials; use carbide_test_support::{Check, check_values}; use carbide_uuid::machine::{MachineId, MachineIdSource, MachineType}; use carbide_uuid::switch::{SwitchId, SwitchIdSource, SwitchType}; + use component_manager::compute_tray_manager::{ + Backend, ComputeTrayAuthentication, ComputeTrayEndpoint, ComputeTrayFirmwareUpdateStatus, + ComputeTrayManager, ComputeTrayResult, ComputeTrayVendor, + }; + use component_manager::error::ComponentManagerError; + use component_manager::types::FirmwareUpdateOptions; + use config_version::ConfigVersion; + use model::component_manager::{ComputeTrayComponent, PowerAction}; + use model::power_manager::PowerState; use model::rack::{ ConfigureNmxClusterState, FirmwareUpgradeDeviceInfo, FirmwareUpgradeState, MaintenanceActivity, MaintenanceScope, NvosUpdateState, RackMaintenanceState, + RackPowerControlPreparedTarget, RackPowerControlState, RackPowerControlTargetResult, RackPowerState, }; use model::rack_type::{RackHardwareType, RackProfile}; use super::{ - filter_inventory_by_scope, firmware_device_status, first_maintenance_state, + ComputeTrayPowerTarget, MACHINE_POWER_OVERRIDE_MESSAGE, MACHINE_POWER_OVERRIDE_SOURCE, + can_restore_desired_power_state, desired_power_state, desired_power_state_after_dispatch, + dispatch_compute_tray_power, filter_inventory_by_scope, firmware_device_status, + first_maintenance_state, is_current_power_control_override, map_compute_tray_vendor, next_state_after_configure, next_state_after_firmware, next_state_after_nvos, - profile_hardware_type_or_any, + next_state_after_power_sequence, pair_prepared_compute_tray_endpoints, + power_control_health_report, power_control_is_only_activity, profile_hardware_type_or_any, + reconcile_compute_tray_power_results, }; + #[derive(Debug, Clone, PartialEq, Eq)] + struct PowerCall { + bmc_ips: Vec, + bmc_ports: Vec>, + vendors: Vec, + credentials: Vec<(String, String)>, + action: PowerAction, + } + + #[derive(Debug, Clone)] + enum TestPowerResponse { + Results(Vec), + TransportError(String), + } + + #[derive(Debug)] + struct RecordingComputeTrayManager { + response: TestPowerResponse, + calls: Mutex>, + } + + impl RecordingComputeTrayManager { + fn new(response: TestPowerResponse) -> Self { + Self { + response, + calls: Mutex::new(Vec::new()), + } + } + + fn calls(&self) -> Vec { + self.calls.lock().unwrap().clone() + } + } + + #[async_trait::async_trait] + impl ComputeTrayManager for RecordingComputeTrayManager { + fn name(&self) -> &str { + "recording-compute" + } + + fn backend(&self) -> Backend { + Backend::Mock + } + + async fn power_control( + &self, + endpoints: &[ComputeTrayEndpoint], + action: PowerAction, + ) -> Result, ComponentManagerError> { + self.calls.lock().unwrap().push(PowerCall { + bmc_ips: endpoints.iter().map(|endpoint| endpoint.bmc_ip).collect(), + bmc_ports: endpoints.iter().map(|endpoint| endpoint.bmc_port).collect(), + vendors: endpoints.iter().map(|endpoint| endpoint.vendor).collect(), + credentials: endpoints + .iter() + .map(|endpoint| match &endpoint.authentication { + ComputeTrayAuthentication::Credentials(Credentials::UsernamePassword { + username, + password, + }) => (username.clone(), password.clone()), + ComputeTrayAuthentication::CredentialKey(_) => { + panic!("rack controller must resolve credentials before dispatch") + } + }) + .collect(), + action, + }); + + match &self.response { + TestPowerResponse::Results(results) => Ok(results.clone()), + TestPowerResponse::TransportError(error) => { + Err(ComponentManagerError::Internal(error.clone())) + } + } + } + + async fn update_firmware( + &self, + _endpoints: &[ComputeTrayEndpoint], + _target_version: &str, + _components: &[ComputeTrayComponent], + _options: &FirmwareUpdateOptions, + ) -> Result, ComponentManagerError> { + unreachable!("firmware update is not exercised by power-control tests") + } + + async fn get_firmware_status( + &self, + _endpoints: &[ComputeTrayEndpoint], + ) -> Result, ComponentManagerError> { + unreachable!("firmware status is not exercised by power-control tests") + } + + async fn list_firmware_bundles(&self) -> Result, ComponentManagerError> { + unreachable!("firmware bundles are not exercised by power-control tests") + } + } + + fn compute_endpoint( + octet: u8, + port: Option, + vendor: ComputeTrayVendor, + ) -> ComputeTrayEndpoint { + ComputeTrayEndpoint { + vendor, + bmc_ip: IpAddr::V4(Ipv4Addr::new(192, 0, 2, octet)), + bmc_port: port, + authentication: ComputeTrayAuthentication::Credentials(Credentials::UsernamePassword { + username: format!("admin-{octet}"), + password: format!("password-{octet}"), + }), + } + } + + fn compute_power_target(seed: u8, endpoint: ComputeTrayEndpoint) -> ComputeTrayPowerTarget { + ComputeTrayPowerTarget { + machine_id: test_machine_id(seed), + endpoint, + } + } + + fn power_result(endpoint: &ComputeTrayEndpoint, success: bool) -> ComputeTrayResult { + ComputeTrayResult { + bmc_ip: endpoint.bmc_ip, + success, + error: (!success).then(|| "backend rejected request".into()), + } + } + fn test_machine_id(seed: u8) -> MachineId { let mut hash = [0u8; 32]; hash[0] = seed; @@ -2339,6 +3468,425 @@ mod tests { } } + #[test] + fn desired_power_state_covers_every_action() { + check_values( + [ + Check { + scenario: "on", + input: PowerAction::On, + expect: PowerState::On, + }, + Check { + scenario: "graceful restart", + input: PowerAction::GracefulRestart, + expect: PowerState::On, + }, + Check { + scenario: "force restart", + input: PowerAction::ForceRestart, + expect: PowerState::On, + }, + Check { + scenario: "AC power cycle", + input: PowerAction::AcPowercycle, + expect: PowerState::On, + }, + Check { + scenario: "graceful shutdown", + input: PowerAction::GracefulShutdown, + expect: PowerState::Off, + }, + Check { + scenario: "force off", + input: PowerAction::ForceOff, + expect: PowerState::Off, + }, + ], + desired_power_state, + ); + } + + #[test] + fn power_control_only_activity_is_explicit_and_singular() { + check_values( + [ + Check { + scenario: "empty all-activities scope", + input: MaintenanceScope::default(), + expect: false, + }, + Check { + scenario: "one explicit power operation", + input: scope_of(vec![power_control(PowerAction::On)]), + expect: true, + }, + Check { + scenario: "power operation combined with standard maintenance", + input: scope_of(vec![ + MaintenanceActivity::PowerSequence, + power_control(PowerAction::On), + ]), + expect: false, + }, + Check { + scenario: "duplicate power operations", + input: scope_of(vec![ + power_control(PowerAction::On), + power_control(PowerAction::ForceOff), + ]), + expect: false, + }, + ], + |scope| power_control_is_only_activity(&scope), + ); + } + + #[test] + fn power_control_health_report_matches_direct_dispatch_semantics() { + let source = format!("{MACHINE_POWER_OVERRIDE_SOURCE}/test-operation/test-machine"); + let report = power_control_health_report(source.clone()); + + assert_eq!(report.source, source); + assert_eq!(report.alerts.len(), 1); + let alert = &report.alerts[0]; + assert_eq!(alert.id.as_str(), "Maintenance"); + assert_eq!(alert.message, MACHINE_POWER_OVERRIDE_MESSAGE); + assert_eq!(alert.classifications.len(), 1); + assert_eq!( + alert.classifications[0].as_str(), + "SuppressExternalAlerting" + ); + } + + #[test] + fn health_override_cleanup_requires_exact_operation_report() { + let expected = power_control_health_report("component_power_control/operation-a/machine-a"); + let same_source_other_report = health_report::HealthReport::empty(expected.source.clone()); + + assert!(is_current_power_control_override( + Some(&expected), + &expected + )); + assert!(!is_current_power_control_override( + Some(&same_source_other_report), + &expected + )); + assert!(!is_current_power_control_override(None, &expected)); + } + + #[test] + fn prepared_bmc_ip_drift_blocks_dispatch_pairing() { + let prepared = RackPowerControlPreparedTarget { + machine_id: test_machine_id(41), + bmc_ip: IpAddr::V4(Ipv4Addr::new(192, 0, 2, 41)), + previous_desired_power_state: PowerState::On, + prepared_desired_power_state_version: ConfigVersion::new(8), + power_control_health_report: power_control_health_report("test/ip-drift"), + previous_replacement_health_report: None, + }; + let resolved = compute_endpoint(42, None, ComputeTrayVendor::Dell); + + let error = pair_prepared_compute_tray_endpoints(&[prepared], vec![resolved]).unwrap_err(); + + assert!(error.contains("changed from prepared 192.0.2.41 to resolved 192.0.2.42")); + } + + #[test] + fn mixed_results_finalize_success_and_roll_back_failure_independently() { + let prepared_version = ConfigVersion::new(8); + let target = RackPowerControlPreparedTarget { + machine_id: test_machine_id(42), + bmc_ip: IpAddr::V4(Ipv4Addr::new(192, 0, 2, 42)), + previous_desired_power_state: PowerState::Off, + prepared_desired_power_state_version: prepared_version, + power_control_health_report: power_control_health_report("test/on"), + previous_replacement_health_report: None, + }; + + assert_eq!( + desired_power_state_after_dispatch(&target, PowerAction::On, false), + PowerState::On + ); + assert_eq!( + desired_power_state_after_dispatch(&target, PowerAction::On, true), + PowerState::Off + ); + } + + #[test] + fn desired_power_state_rollback_requires_prepared_state_and_version() { + let prepared_version = ConfigVersion::new(8); + let stale_version = ConfigVersion::new(9); + let target = RackPowerControlPreparedTarget { + machine_id: test_machine_id(43), + bmc_ip: IpAddr::V4(Ipv4Addr::new(192, 0, 2, 43)), + previous_desired_power_state: PowerState::On, + prepared_desired_power_state_version: prepared_version, + power_control_health_report: power_control_health_report("test/rollback"), + previous_replacement_health_report: None, + }; + + check_values( + [ + Check { + scenario: "matching passive state and version", + input: (PowerState::Off, target.prepared_desired_power_state_version), + expect: true, + }, + Check { + scenario: "desired state changed concurrently", + input: (PowerState::On, target.prepared_desired_power_state_version), + expect: false, + }, + Check { + scenario: "version changed concurrently", + input: (PowerState::Off, stale_version), + expect: false, + }, + ], + |(state, version)| { + can_restore_desired_power_state(state, &version, &target, PowerState::Off) + }, + ); + } + + #[test] + fn compute_tray_vendor_mapping_covers_all_bmc_vendors() { + check_values( + [ + Check { + scenario: "Dell", + input: bmc_vendor::BMCVendor::Dell, + expect: ComputeTrayVendor::Dell, + }, + Check { + scenario: "HPE", + input: bmc_vendor::BMCVendor::Hpe, + expect: ComputeTrayVendor::Hpe, + }, + Check { + scenario: "Lenovo", + input: bmc_vendor::BMCVendor::Lenovo, + expect: ComputeTrayVendor::Lenovo, + }, + Check { + scenario: "Lenovo AMI", + input: bmc_vendor::BMCVendor::LenovoAMI, + expect: ComputeTrayVendor::LenovoAmi, + }, + Check { + scenario: "Supermicro", + input: bmc_vendor::BMCVendor::Supermicro, + expect: ComputeTrayVendor::Supermicro, + }, + Check { + scenario: "NVIDIA", + input: bmc_vendor::BMCVendor::Nvidia, + expect: ComputeTrayVendor::Nvidia, + }, + Check { + scenario: "power shelf vendor", + input: bmc_vendor::BMCVendor::Liteon, + expect: ComputeTrayVendor::Unknown, + }, + Check { + scenario: "alternate power shelf vendor", + input: bmc_vendor::BMCVendor::Delta, + expect: ComputeTrayVendor::Unknown, + }, + Check { + scenario: "unknown", + input: bmc_vendor::BMCVendor::Unknown, + expect: ComputeTrayVendor::Unknown, + }, + ], + map_compute_tray_vendor, + ); + } + + #[tokio::test] + async fn dispatch_compute_tray_power_sends_only_scoped_endpoints() { + let scoped = vec![ + compute_power_target( + 10, + compute_endpoint(10, Some(8443), ComputeTrayVendor::Dell), + ), + compute_power_target(12, compute_endpoint(12, None, ComputeTrayVendor::Nvidia)), + ]; + let manager = RecordingComputeTrayManager::new(TestPowerResponse::Results(vec![ + power_result(&scoped[1].endpoint, true), + power_result(&scoped[0].endpoint, true), + ])); + + let result = + dispatch_compute_tray_power(&manager, &scoped, PowerAction::ForceRestart).await; + + assert!(result.error.is_none()); + assert_eq!( + result.dispatched_bmc_ips, + vec![scoped[0].endpoint.bmc_ip, scoped[1].endpoint.bmc_ip] + ); + assert!( + result + .target_outcomes + .iter() + .all(|outcome| matches!(&outcome.outcome, RackPowerControlTargetResult::Succeeded)) + ); + assert_eq!( + manager.calls(), + vec![PowerCall { + bmc_ips: vec![scoped[0].endpoint.bmc_ip, scoped[1].endpoint.bmc_ip], + bmc_ports: vec![Some(8443), None], + vendors: vec![ComputeTrayVendor::Dell, ComputeTrayVendor::Nvidia], + credentials: vec![ + ("admin-10".into(), "password-10".into()), + ("admin-12".into(), "password-12".into()), + ], + action: PowerAction::ForceRestart, + }] + ); + } + + #[tokio::test] + async fn dispatch_compute_tray_power_surfaces_backend_transport_failure() { + let targets = vec![compute_power_target( + 10, + compute_endpoint(10, None, ComputeTrayVendor::Dell), + )]; + let manager = RecordingComputeTrayManager::new(TestPowerResponse::TransportError( + "RMS unavailable".into(), + )); + + let result = dispatch_compute_tray_power(&manager, &targets, PowerAction::On).await; + let error = result.error.unwrap(); + + assert!(error.contains("recording-compute")); + assert!(error.contains("RMS unavailable")); + assert_eq!(result.dispatched_bmc_ips, vec![targets[0].endpoint.bmc_ip]); + assert!(matches!( + &result.target_outcomes[0].outcome, + RackPowerControlTargetResult::Failed { .. } + )); + assert_eq!(manager.calls().len(), 1); + } + + #[tokio::test] + async fn dispatch_compute_tray_power_rejects_empty_backend_result() { + let targets = vec![compute_power_target( + 10, + compute_endpoint(10, None, ComputeTrayVendor::Dell), + )]; + let manager = RecordingComputeTrayManager::new(TestPowerResponse::Results(vec![])); + + let result = dispatch_compute_tray_power(&manager, &targets, PowerAction::ForceOff).await; + + assert!(result.error.unwrap().contains("backend returned no result")); + assert!(matches!( + &result.target_outcomes[0].outcome, + RackPowerControlTargetResult::Failed { .. } + )); + } + + #[test] + fn reconcile_compute_tray_power_results_covers_per_device_contract() { + struct ValidationCase { + targets: Vec, + results: Vec, + } + + let endpoint_a = compute_endpoint(10, None, ComputeTrayVendor::Dell); + let endpoint_b = compute_endpoint(11, None, ComputeTrayVendor::Hpe); + let unexpected = compute_endpoint(99, None, ComputeTrayVendor::Unknown); + let target_a = compute_power_target(10, endpoint_a.clone()); + let target_b = compute_power_target(11, endpoint_b.clone()); + check_values( + [ + Check { + scenario: "complete results may be out of order", + input: ValidationCase { + targets: vec![target_a.clone(), target_b.clone()], + results: vec![ + power_result(&endpoint_b, true), + power_result(&endpoint_a, true), + ], + }, + expect: (vec![true, true], false), + }, + Check { + scenario: "per-device failure", + input: ValidationCase { + targets: vec![target_a.clone()], + results: vec![power_result(&endpoint_a, false)], + }, + expect: (vec![false], true), + }, + Check { + scenario: "missing device result", + input: ValidationCase { + targets: vec![target_a.clone(), target_b.clone()], + results: vec![power_result(&endpoint_a, true)], + }, + expect: (vec![true, false], true), + }, + Check { + scenario: "unexpected device result", + input: ValidationCase { + targets: vec![target_a.clone()], + results: vec![power_result(&unexpected, true)], + }, + expect: (vec![false], true), + }, + Check { + scenario: "duplicate device result", + input: ValidationCase { + targets: vec![target_a.clone()], + results: vec![ + power_result(&endpoint_a, true), + power_result(&endpoint_a, true), + ], + }, + expect: (vec![false], true), + }, + Check { + scenario: "duplicate endpoint", + input: ValidationCase { + targets: vec![ + target_a.clone(), + compute_power_target(12, endpoint_a.clone()), + ], + results: vec![power_result(&endpoint_a, true)], + }, + expect: (vec![false, false], true), + }, + Check { + scenario: "mixed target success and failure", + input: ValidationCase { + targets: vec![target_a, target_b], + results: vec![ + power_result(&endpoint_a, true), + power_result(&endpoint_b, false), + ], + }, + expect: (vec![true, false], true), + }, + ], + |case| { + let result = reconcile_compute_tray_power_results(&case.targets, &case.results); + ( + result + .target_outcomes + .into_iter() + .map(|outcome| { + matches!(outcome.outcome, RackPowerControlTargetResult::Succeeded) + }) + .collect(), + result.error.is_some(), + ) + }, + ); + } + #[test] fn profile_hardware_type_or_any_defaults_missing_values_to_any() { assert_eq!(profile_hardware_type_or_any(None), "any"); @@ -2472,6 +4020,10 @@ mod tests { } } + fn power_control(action: PowerAction) -> MaintenanceActivity { + MaintenanceActivity::PowerControl { action } + } + fn scope_of(activities: Vec) -> MaintenanceScope { MaintenanceScope { activities, @@ -2503,6 +4055,13 @@ mod tests { } } + fn power_control_state(action: PowerAction) -> RackMaintenanceState { + RackMaintenanceState::PowerControl { + action, + power_control_state: RackPowerControlState::Preparing, + } + } + // ── first_maintenance_state ───────────────────────────────────────── #[test] @@ -2534,6 +4093,11 @@ mod tests { input: scope_of(vec![MaintenanceActivity::PowerSequence]), expect: powering_on(), }, + Check { + scenario: "only scoped power control -> power control", + input: scope_of(vec![power_control(PowerAction::ForceOff)]), + expect: power_control_state(PowerAction::ForceOff), + }, Check { scenario: "configure and power -> configure first", input: scope_of(vec![ @@ -2542,6 +4106,14 @@ mod tests { ]), expect: configure_start(), }, + Check { + scenario: "power sequence precedes scoped power control", + input: scope_of(vec![ + MaintenanceActivity::PowerSequence, + power_control(PowerAction::On), + ]), + expect: powering_on(), + }, ], |scope| first_maintenance_state(&scope), ); @@ -2575,6 +4147,14 @@ mod tests { input: scope_of(vec![firmware_upgrade(), nvos_update()]), expect: nvos_start(), }, + Check { + scenario: "firmware then scoped power control", + input: scope_of(vec![ + firmware_upgrade(), + power_control(PowerAction::ForceRestart), + ]), + expect: power_control_state(PowerAction::ForceRestart), + }, ], |scope| next_state_after_firmware(&scope), ); @@ -2596,6 +4176,14 @@ mod tests { input: scope_of(vec![nvos_update(), MaintenanceActivity::PowerSequence]), expect: powering_on(), }, + Check { + scenario: "nvos then scoped power control", + input: scope_of(vec![ + nvos_update(), + power_control(PowerAction::GracefulShutdown), + ]), + expect: power_control_state(PowerAction::GracefulShutdown), + }, ], |scope| next_state_after_nvos(&scope), ); @@ -2620,8 +4208,43 @@ mod tests { ]), expect: RackMaintenanceState::Completed, }, + Check { + scenario: "configure then scoped power control", + input: scope_of(vec![ + MaintenanceActivity::ConfigureNmxCluster, + power_control(PowerAction::On), + ]), + expect: power_control_state(PowerAction::On), + }, ], |scope| next_state_after_configure(&scope), ); } + + #[test] + fn test_next_state_after_power_sequence() { + check_values( + [ + Check { + scenario: "empty all-activities scope does not imply power control", + input: MaintenanceScope::default(), + expect: RackMaintenanceState::Completed, + }, + Check { + scenario: "power sequence alone completes", + input: scope_of(vec![MaintenanceActivity::PowerSequence]), + expect: RackMaintenanceState::Completed, + }, + Check { + scenario: "explicit scoped power control follows power sequence", + input: scope_of(vec![ + MaintenanceActivity::PowerSequence, + power_control(PowerAction::AcPowercycle), + ]), + expect: power_control_state(PowerAction::AcPowercycle), + }, + ], + |scope| next_state_after_power_sequence(&scope), + ); + } } diff --git a/crates/rack-controller/src/ready.rs b/crates/rack-controller/src/ready.rs index bb99015daa..0a263fca4b 100644 --- a/crates/rack-controller/src/ready.rs +++ b/crates/rack-controller/src/ready.rs @@ -62,6 +62,7 @@ pub async fn handle_ready( ); state.config.reprovision_requested = false; state.config.maintenance_requested = None; + state.config.power_control_dispatch_started_at = None; let mut txn = ctx.services.db_pool.begin().await?; db_rack::update(txn.as_mut(), id, &state.config).await?; return Ok(StateHandlerOutcome::transition(RackState::Maintenance { diff --git a/crates/rack/src/firmware_object.rs b/crates/rack/src/firmware_object.rs index f24c0db611..f81079ef7f 100644 --- a/crates/rack/src/firmware_object.rs +++ b/crates/rack/src/firmware_object.rs @@ -37,9 +37,13 @@ pub fn rms_access_token_or_noauth(access_token: Option<&str>) -> String { .to_string() } -pub fn rack_maintenance_access_token_key(rack_id: &RackId) -> CredentialKey { +pub fn rack_maintenance_access_token_key( + rack_id: &RackId, + maintenance_request_id: Option<&str>, +) -> CredentialKey { CredentialKey::RackMaintenanceAccessToken { rack_id: rack_id.clone(), + maintenance_request_id: maintenance_request_id.map(str::to_owned), } } @@ -65,4 +69,13 @@ mod tests { ); assert_eq!(rms_access_token_or_noauth(Some("token")), "token"); } + + #[test] + fn maintenance_access_token_key_carries_request_id() { + let key = rack_maintenance_access_token_key(&RackId::new("rack-01"), Some("request-123")); + assert_eq!( + key.to_key_str(), + "racks/rack-01/maintenance/request-123/access-token" + ); + } } diff --git a/crates/secrets/src/credentials.rs b/crates/secrets/src/credentials.rs index 4d9efacffb..034342e85a 100644 --- a/crates/secrets/src/credentials.rs +++ b/crates/secrets/src/credentials.rs @@ -356,6 +356,11 @@ pub enum CredentialKey { }, RackMaintenanceAccessToken { rack_id: RackId, + /// Identifies the maintenance request that owns this token. `None` + /// retains the legacy rack-scoped key for persisted requests created + /// before request-scoped tokens were introduced. + #[serde(default, skip_serializing_if = "Option::is_none")] + maintenance_request_id: Option, }, } @@ -582,9 +587,16 @@ impl CredentialKey { CredentialKey::Bgp { credential_type } => match credential_type { BgpCredentialType::SiteWideLeafPassword => Cow::from("bgp/leaf/site/auth"), }, - CredentialKey::RackMaintenanceAccessToken { rack_id } => { - Cow::from(format!("racks/{rack_id}/maintenance/access-token")) - } + CredentialKey::RackMaintenanceAccessToken { + rack_id, + maintenance_request_id: Some(maintenance_request_id), + } => Cow::from(format!( + "racks/{rack_id}/maintenance/{maintenance_request_id}/access-token" + )), + CredentialKey::RackMaintenanceAccessToken { + rack_id, + maintenance_request_id: None, + } => Cow::from(format!("racks/{rack_id}/maintenance/access-token")), } } } @@ -681,6 +693,28 @@ mod tests { assert_eq!(nvos.prefix(), CredentialPrefix::SwitchNvosAdmin); } + #[test] + fn rack_maintenance_access_token_path_is_request_scoped_with_legacy_fallback() { + let rack_id = RackId::new("rack-01"); + let request_scoped = CredentialKey::RackMaintenanceAccessToken { + rack_id: rack_id.clone(), + maintenance_request_id: Some("request-123".to_string()), + }; + assert_eq!( + request_scoped.to_key_str(), + "racks/rack-01/maintenance/request-123/access-token" + ); + + let legacy = CredentialKey::RackMaintenanceAccessToken { + rack_id, + maintenance_request_id: None, + }; + assert_eq!( + legacy.to_key_str(), + "racks/rack-01/maintenance/access-token" + ); + } + #[tokio::test] async fn composite_manager_delegates_reads_and_writes() { let reader = TestCredentialManager::new(Credentials::UsernamePassword { @@ -1020,7 +1054,10 @@ mod tests { Check { scenario: "rack maintenance access token", input: Row { - key: CredentialKey::RackMaintenanceAccessToken { rack_id }, + key: CredentialKey::RackMaintenanceAccessToken { + rack_id, + maintenance_request_id: None, + }, expected_prefix: "racks/", }, expect: PathChecks::all_hold(), @@ -1099,7 +1136,10 @@ mod tests { CredentialKey::MachineIdentityEncryptionKey { key_id: "k".to_string(), }, - CredentialKey::RackMaintenanceAccessToken { rack_id }, + CredentialKey::RackMaintenanceAccessToken { + rack_id, + maintenance_request_id: None, + }, ]; for key in &keys { diff --git a/crates/secrets/src/test_support/credentials.rs b/crates/secrets/src/test_support/credentials.rs index 441781a5d3..056c5528a6 100644 --- a/crates/secrets/src/test_support/credentials.rs +++ b/crates/secrets/src/test_support/credentials.rs @@ -42,6 +42,15 @@ impl TestCredentialManager { set_credentials_sleep_time_ms: Default::default(), } } + + pub async fn count_credentials_with_prefix(&self, prefix: &str) -> usize { + self.credentials + .lock() + .await + .keys() + .filter(|key| key.starts_with(prefix)) + .count() + } } #[async_trait]