Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/admin-cli/src/machine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub mod nvlink_info;
pub mod positions;
pub mod reboot;
pub mod show;
pub mod vendor_override;

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -89,4 +90,9 @@ pub enum Cmd {
Positions(positions::Args),
#[clap(subcommand, about = "Update/show NVLink info for an MNNVL machine")]
NvlinkInfo(nvlink_info::Args),
#[clap(
subcommand,
about = "Pin or clear the Redfish BMC vendor override for a machine"
)]
VendorOverride(vendor_override::Args),
}
74 changes: 74 additions & 0 deletions crates/admin-cli/src/machine/vendor_override/args.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use carbide_uuid::machine::MachineId;
use clap::Parser;

#[derive(Parser, Debug, Clone)]
pub enum Args {
#[clap(about = "Pin the Redfish BMC vendor for a machine")]
Set(VendorOverrideSet),
#[clap(about = "Clear the Redfish BMC vendor override for a machine")]
Clear(VendorOverrideClear),
#[clap(about = "Show the Redfish BMC vendor override for a machine")]
Show(VendorOverrideShow),
}

#[derive(Parser, Debug, Clone)]
#[command(after_long_help = "\
EXAMPLES:

Force a machine's BMC vendor to Dell:
$ nico-admin-cli machine vendor-override set 12345678-1234-5678-90ab-cdef01234567 \
--vendor Dell

")]
pub struct VendorOverrideSet {
#[clap(help = "The machine whose BMC vendor should be pinned")]
pub machine: MachineId,
#[clap(
long,
help = "RedfishVendor to force (e.g. Dell, Supermicro, NvidiaDpu, Hpe, Lenovo)"
)]
pub vendor: String,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

#[derive(Parser, Debug, Clone)]
#[command(after_long_help = "\
EXAMPLES:

Clear a machine's BMC vendor override (return to automatic detection):
$ nico-admin-cli machine vendor-override clear 12345678-1234-5678-90ab-cdef01234567

")]
pub struct VendorOverrideClear {
#[clap(help = "The machine whose BMC vendor override should be cleared")]
pub machine: MachineId,
}

#[derive(Parser, Debug, Clone)]
#[command(after_long_help = "\
EXAMPLES:

Show a machine's pinned BMC vendor (or that none is set):
$ nico-admin-cli machine vendor-override show 12345678-1234-5678-90ab-cdef01234567

")]
pub struct VendorOverrideShow {
#[clap(help = "The machine whose BMC vendor override should be shown")]
pub machine: MachineId,
}
62 changes: 62 additions & 0 deletions crates/admin-cli/src/machine/vendor_override/cmd.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use carbide_uuid::machine::MachineId;
use rpc::Machine;

use super::args::{Args, VendorOverrideClear, VendorOverrideSet, VendorOverrideShow};
use crate::errors::{CarbideCliError, CarbideCliResult};
use crate::rpc::ApiClient;

pub async fn vendor_override(api_client: &ApiClient, cmd: Args) -> CarbideCliResult<()> {
match cmd {
Args::Set(cmd) => set(api_client, cmd).await,
Args::Clear(cmd) => clear(api_client, cmd).await,
Args::Show(cmd) => show(api_client, cmd).await,
}
}

async fn fetch_machine(api_client: &ApiClient, machine_id: MachineId) -> CarbideCliResult<Machine> {
let mut machines = api_client
.get_machines_by_ids(&[machine_id])
.await?
.machines;
machines.pop().ok_or_else(|| {
CarbideCliError::GenericError(format!("Machine with ID {machine_id} was not found"))
})
}

async fn set(api_client: &ApiClient, cmd: VendorOverrideSet) -> CarbideCliResult<()> {
api_client
.update_machine_bmc_vendor_override(cmd.machine, Some(cmd.vendor))
.await
}

async fn clear(api_client: &ApiClient, cmd: VendorOverrideClear) -> CarbideCliResult<()> {
api_client
.update_machine_bmc_vendor_override(cmd.machine, None)
.await
}

async fn show(api_client: &ApiClient, cmd: VendorOverrideShow) -> CarbideCliResult<()> {
let machine = fetch_machine(api_client, cmd.machine).await?;
match machine.bmc_vendor_override.as_deref() {
Some(vendor) => println!("{vendor}"),
None => println!("not set (automatic detection)"),
}
Ok(())
}
32 changes: 32 additions & 0 deletions crates/admin-cli/src/machine/vendor_override/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

pub mod args;
pub mod cmd;

pub use args::Args;

use crate::cfg::run::Run;
use crate::cfg::runtime::RuntimeContext;
use crate::errors::CarbideCliResult;

impl Run for Args {
async fn run(self, ctx: &mut RuntimeContext) -> CarbideCliResult<()> {
cmd::vendor_override(&ctx.api_client, self).await?;
Ok(())
}
}
12 changes: 12 additions & 0 deletions crates/admin-cli/src/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2045,6 +2045,18 @@ impl ApiClient {
Ok(self.0.update_machine_metadata(request).await?)
}

pub async fn update_machine_bmc_vendor_override(
&self,
machine_id: MachineId,
bmc_vendor_override: Option<String>,
) -> CarbideCliResult<()> {
let request = ::rpc::forge::MachineBmcVendorOverrideUpdateRequest {
machine_id: Some(machine_id),
bmc_vendor_override,
};
Ok(self.0.update_machine_bmc_vendor_override(request).await?)
}

pub async fn update_rack_metadata(
&self,
rack_id: RackId,
Expand Down
7 changes: 7 additions & 0 deletions crates/api-core/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,13 @@ impl Forge for Api {
crate::handlers::machine::update_machine_metadata(self, request).await
}

async fn update_machine_bmc_vendor_override(
&self,
request: Request<rpc::MachineBmcVendorOverrideUpdateRequest>,
) -> std::result::Result<Response<()>, Status> {
crate::handlers::machine::update_machine_bmc_vendor_override(self, request).await
}

async fn update_rack_metadata(
&self,
request: Request<rpc::RackMetadataUpdateRequest>,
Expand Down
6 changes: 5 additions & 1 deletion crates/api-core/src/handlers/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,10 @@ pub(crate) async fn invoke_power(
// but instead queue it for the state handler. That will avoid racing
// with other internal reboot requests from the state handler.
let bmc_ip = bmc_ip.to_string();
let vendor_override = carbide_redfish::libredfish::conv::redfish_vendor_override(
&bmc_ip,
snapshot.host_snapshot.bmc_vendor_override.as_deref(),
);
let client = api
.redfish_pool
.create_client(
Expand All @@ -1049,7 +1053,7 @@ pub(crate) async fn invoke_power(
RedfishAuth::Key(CredentialKey::BmcCredentials {
credential_type: BmcCredentialType::BmcRoot { bmc_mac_address },
}),
None,
vendor_override,
)
.await
.map_err(|e| CarbideError::internal(e.to_string()))?;
Expand Down
42 changes: 41 additions & 1 deletion crates/api-core/src/handlers/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,41 @@ pub(crate) async fn update_machine_metadata(
Ok(tonic::Response::new(()))
}

pub(crate) async fn update_machine_bmc_vendor_override(
api: &Api,
request: Request<rpc::MachineBmcVendorOverrideUpdateRequest>,
) -> std::result::Result<tonic::Response<()>, tonic::Status> {
log_request_data(&request);
let request = request.into_inner();
let machine_id = convert_and_log_machine_id(request.machine_id.as_ref())?;

let mut txn = api.txn_begin().await?;
if db::machine::find_one(&mut txn, &machine_id, MachineSearchConfig::default())
.await?
.is_none()
{
return Err(CarbideError::NotFoundError {
kind: "machine",
id: machine_id.to_string(),
}
.into());
}

// Store the override as a plain string. An empty or absent value clears it.
// libredfish does the vendor matching when the client is built, so the API
// keeps no vendor list of its own.
let bmc_vendor_override = match request.bmc_vendor_override {
Some(name) if !name.is_empty() => Some(name),
_ => None,
};

db::machine::update_bmc_vendor_override(&mut txn, &machine_id, bmc_vendor_override).await?;
Comment thread
coderabbitai[bot] marked this conversation as resolved.

txn.commit().await?;

Ok(tonic::Response::new(()))
}

pub(crate) async fn admin_force_delete_machine(
api: &Api,
request: Request<rpc::AdminForceDeleteMachineRequest>,
Expand Down Expand Up @@ -472,6 +507,11 @@ pub(crate) async fn admin_force_delete_machine(
"BMC IP and MAC address for machine was found. Trying to perform Bios unlock",
);

let vendor_override = carbide_redfish::libredfish::conv::redfish_vendor_override(
&ip_address,
machine.bmc_vendor_override.as_deref(),
);

match api
.redfish_pool
.create_client(
Expand All @@ -480,7 +520,7 @@ pub(crate) async fn admin_force_delete_machine(
RedfishAuth::Key(CredentialKey::BmcCredentials {
credential_type: BmcCredentialType::BmcRoot { bmc_mac_address },
}),
None,
vendor_override,
)
.await
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Add bmc_vendor_override to machines so an operator can pin the Redfish BMC
-- vendor for a machine. NULL means automatic detection. The value is a
-- RedfishVendor variant name passed down into libredfish as the forced vendor.
ALTER TABLE machines ADD COLUMN bmc_vendor_override text;
17 changes: 17 additions & 0 deletions crates/api-db/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,23 @@ pub async fn update_metadata(
}
}

/// Set or clear the operator pinned Redfish BMC vendor override for a machine.
/// Passing None clears it.
pub async fn update_bmc_vendor_override(
txn: &mut PgConnection,
machine_id: &MachineId,
bmc_vendor_override: Option<String>,
) -> Result<(), DatabaseError> {
let query = "UPDATE machines SET bmc_vendor_override = $1 WHERE id = $2";
sqlx::query(query)
.bind(bmc_vendor_override)
.bind(machine_id)
.execute(txn)
.await
.map_err(|e| DatabaseError::query(query, e))?;
Ok(())
}

/// Only does the update if the passed observation is newer than any existing one
pub async fn update_network_status_observation(
txn: &mut PgConnection,
Expand Down
25 changes: 18 additions & 7 deletions crates/api-db/src/machine_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,17 +373,28 @@ pub async fn lookup_bmc_access_info(
ip: IpAddr,
port: Option<u16>,
) -> DatabaseResult<BmcAccessInfo> {
let mac_address = find_by_ip(db, ip)
.await?
.ok_or_else(|| DatabaseError::NotFoundError {
kind: "Machine Interface",
id: ip.to_string(),
})?
.mac_address;
// Resolve the BMC interface MAC and the owning machine's vendor override in
// one query so every client_by_info caller can act on the override.
let query = r"SELECT mi.mac_address, m.bmc_vendor_override
FROM machine_interface_addresses mia
INNER JOIN machine_interfaces mi ON mi.id = mia.interface_id
LEFT JOIN machines m ON m.id = mi.machine_id
WHERE mia.address = $1::inet
LIMIT 1";
let row: Option<(MacAddress, Option<String>)> = sqlx::query_as(query)
.bind(ip)
.fetch_optional(db)
.await
.map_err(|e| DatabaseError::query(query, e))?;
let (mac_address, bmc_vendor_override) = row.ok_or_else(|| DatabaseError::NotFoundError {
kind: "Machine Interface",
id: ip.to_string(),
})?;
Ok(BmcAccessInfo {
host: ip.to_string(),
port,
mac_address,
bmc_vendor_override,
})
}

Expand Down
3 changes: 3 additions & 0 deletions crates/api-model/src/machine/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ pub struct MachineSnapshotPgJson {
pub interfaces: Vec<MachineInterfaceSnapshot>,
pub topology: Vec<MachineTopology>,
pub bmc_info: BmcInfo,
#[serde(default)]
pub bmc_vendor_override: Option<String>,
pub labels: HashMap<String, String>,
pub name: String,
pub description: String,
Expand Down Expand Up @@ -181,6 +183,7 @@ impl TryFrom<MachineSnapshotPgJson> for Machine {
interfaces: value.interfaces,
hardware_info,
bmc_info: value.bmc_info,
bmc_vendor_override: value.bmc_vendor_override,
last_reboot_time: value.last_reboot_time,
last_cleanup_time: value.last_cleanup_time,
last_discovery_time: value.last_discovery_time,
Expand Down
6 changes: 6 additions & 0 deletions crates/api-model/src/machine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,12 @@ pub struct Machine {
/// The BMC info for this machine
pub bmc_info: BmcInfo,

/// Operator pinned Redfish BMC vendor for this machine. When set, it is
/// passed to libredfish as the forced vendor instead of detection from the
/// service root. None means automatic detection. Holds a RedfishVendor
/// variant name.
pub bmc_vendor_override: Option<String>,

/// Last time when machine came up.
pub last_reboot_time: Option<DateTime<Utc>>,

Expand Down
Loading