Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions crates/machine-a-tron/src/mock_ssh_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub struct Credentials {
pub enum PromptBehavior {
Dell,
Dpu,
LenovoSr650,
}

pub async fn spawn(
Expand Down Expand Up @@ -202,9 +203,10 @@ impl MockSshHandler {
format!("\r\nroot@{} # ", self.prompt_hostname.get_hostname()),
)?;
}
ConsoleState::Bmc => {
session.data(channel, "\nracadm>>")?;
}
ConsoleState::Bmc => match self.prompt_behavior {
PromptBehavior::LenovoSr650 => session.data(channel, "\nsystem>")?,
_ => session.data(channel, "\nracadm>>")?,
},
ConsoleState::NoShell => {
// Do nothing
}
Expand Down Expand Up @@ -256,7 +258,7 @@ impl server::Handler for MockSshHandler {
) -> StdResult<(), Self::Error> {
tracing::debug!("shell_request");
match self.prompt_behavior {
PromptBehavior::Dell => {
PromptBehavior::Dell | PromptBehavior::LenovoSr650 => {
self.console_state = ConsoleState::Bmc;
}
PromptBehavior::Dpu => {
Expand Down Expand Up @@ -314,11 +316,33 @@ impl server::Handler for MockSshHandler {
ConsoleState::Bmc => {
if data == b"\n" || data == b"\r\n" || data == b"\r" {
let command = std::mem::take(&mut self.buffer);
if command.starts_with(b"connect com2") {
tracing::info!(
"Got `connect com2` in bmc propmt, simulating system console"
);
self.console_state = ConsoleState::SystemConsole;
match self.prompt_behavior {
PromptBehavior::Dell if command.starts_with(b"connect com2") => {
tracing::info!(
"Got `connect com2` in bmc prompt, simulating system console"
);
self.console_state = ConsoleState::SystemConsole;
}
PromptBehavior::LenovoSr650 if command.starts_with(b"console kill 1") => {
tracing::info!(
"Got unsupported Lenovo `console kill 1`, simulating BMC error"
);
session.data(
channel,
"\r\nThe command line contains extraneous arguments\r\n",
)?;
}
PromptBehavior::LenovoSr650 if command.starts_with(b"console kill") => {
tracing::info!(
"Got Lenovo `console kill`, simulating terminated SOL session"
);
session.data(channel, "\r\nSession on channel 1 is terminated\r\n")?;
}
PromptBehavior::LenovoSr650 if command.starts_with(b"console start") => {
tracing::info!("Got Lenovo `console start`, simulating system console");
self.console_state = ConsoleState::SystemConsole;
}
Comment thread
williampnvidia marked this conversation as resolved.
_ => {}
}
self.print_prompt(session, channel)?;
} else {
Expand Down
118 changes: 105 additions & 13 deletions crates/ssh-console/src/bmc/connection_impl/ssh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,8 @@ async fn make_authenticated_client(
}

// Interact with the serial-on-lan console within the BMC ssh session, calling the vendor's serial
// activation command (`connect com1`, etc) and ensuring we're in the serial console before
// continuing.
// activation command (`connect com1`, etc), falling back when needed, and ensuring we're in the
// serial console before continuing.
async fn trigger_and_await_sol_console(
machine_id: MachineId,
ssh_client_channel: &mut Channel<russh::client::Msg>,
Expand Down Expand Up @@ -439,12 +439,15 @@ async fn trigger_and_await_sol_console(
})?;

let mut prompt_buf: Vec<u8> = Vec::with_capacity(1024);
let timeout = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
let mut timeout = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
// After sending the activate command, wait for this much data to be read back (the command
// itself echoing back, plus the prompt length) before continuing. (If we let the client use the
// console before this, we get false positives about seeing a bmc prompt while we're supposed to
// be in the console.)
let skip_data_read_len = bmc_prompt.len() + activate_command.len();
let mut skip_data_read_len = bmc_prompt.len() + activate_command.len();
let mut fallback_activate_sent = false;
let mut fallback_activate_commands: Option<&'static [&'static [u8]]> = None;
let mut next_fallback_command_index = 0;

let mut activation_step = SerialConsoleActivationStep::WaitingForBmcPrompt;
loop {
Expand All @@ -467,14 +470,12 @@ async fn trigger_and_await_sol_console(
// We saw the prompt, send the serial activate command (`connect com1`,
// etc) one byte at a time: This seems to work better with some
// consoles.
for byte in activate_command {
ssh_client_channel
.data([*byte].as_slice())
.await
.map_err(|error| ConsoleActivateError::Request { phase: "sending serial activate command to BMC", error })?;
}
ssh_client_channel.data(b"\n".as_slice()).await
.map_err(|error| ConsoleActivateError::Request { phase: "sending data to BMC", error })?;
send_command_bytewise(
ssh_client_channel,
activate_command,
"sending serial activate command to BMC",
)
.await?;
activation_step = SerialConsoleActivationStep::ActivateSent;
// Clear the prompt
prompt_buf.clear();
Expand All @@ -486,7 +487,77 @@ async fn trigger_and_await_sol_console(
// get false positives about seeing a bmc prompt while we're supposed to be
// in the console.)
if matches!(activation_step, SerialConsoleActivationStep::ActivateSent)
&& prompt_buf.len() > skip_data_read_len {
&& let Some(fallback_commands) = bmc_vendor
.fallback_serial_activate_commands_if_needed(
&prompt_buf,
fallback_activate_sent,
)
{
tracing::info!(
%machine_id,
"Primary SOL activation failed, trying fallback"
);
fallback_activate_sent = true;
fallback_activate_commands = Some(fallback_commands);
next_fallback_command_index = 0;
let fallback_command = fallback_commands[next_fallback_command_index];
next_fallback_command_index += 1;
skip_data_read_len = bmc_prompt.len() + fallback_command.len();
timeout = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
send_command_bytewise(
ssh_client_channel,
fallback_command,
"sending fallback serial activate command to BMC",
)
.await?;
prompt_buf.clear();
}

if matches!(activation_step, SerialConsoleActivationStep::ActivateSent)
&& let Some(fallback_commands) = fallback_activate_commands
&& next_fallback_command_index < fallback_commands.len()
&& prompt_buf.len() > skip_data_read_len
&& prompt_buf.windows(bmc_prompt.len()).any(|window| window == bmc_prompt)
{
let fallback_command = fallback_commands[next_fallback_command_index];
next_fallback_command_index += 1;
skip_data_read_len = bmc_prompt.len() + fallback_command.len();
timeout = tokio::time::Instant::now() + std::time::Duration::from_secs(30);
send_command_bytewise(
ssh_client_channel,
fallback_command,
"sending fallback serial activate command to BMC",
)
.await?;
prompt_buf.clear();
}

let waiting_for_fallback_prompt = fallback_activate_commands
.is_some_and(|commands| next_fallback_command_index < commands.len());
let fallback_sequence_complete = fallback_activate_commands
.is_some_and(|commands| next_fallback_command_index == commands.len());
let activation_output = if fallback_sequence_complete
&& let Some(fallback_commands) = fallback_activate_commands
{
let final_fallback_command = fallback_commands[fallback_commands.len() - 1];
prompt_buf
.windows(final_fallback_command.len())
.rposition(|window| window == final_fallback_command)
.map(|command_offset| &prompt_buf[command_offset..])
} else {
Some(prompt_buf.as_slice())
};
if matches!(activation_step, SerialConsoleActivationStep::ActivateSent)
&& !waiting_for_fallback_prompt
&& let Some(activation_output) = activation_output
&& !(fallback_sequence_complete
&& activation_output
.windows(bmc_prompt.len())
.any(|window| window == bmc_prompt))
&& bmc_vendor.should_accept_sol_activation_output(
activation_output,
skip_data_read_len,
) {
tracing::debug!(%machine_id, "confirmed serial activate command sent, letting client use console");
break;
}
Expand Down Expand Up @@ -537,6 +608,27 @@ enum SerialConsoleActivationStep {
ActivateSent,
}

async fn send_command_bytewise(
ssh_client_channel: &mut Channel<russh::client::Msg>,
command: &[u8],
phase: &'static str,
) -> Result<(), ConsoleActivateError> {
for byte in command {
ssh_client_channel
.data([*byte].as_slice())
.await
.map_err(|error| ConsoleActivateError::Request { phase, error })?;
}
ssh_client_channel
.data(b"\n".as_slice())
.await
.map_err(|error| ConsoleActivateError::Request {
phase: "sending data to BMC",
error,
})?;
Ok(())
}

/// Returns `true` if `buf` contains the byte sequence `pat` anywhere
/// (contiguously), running in O(n*m) time (n = buf.len(), m = pat.len())
/// and doing no heap allocations.
Expand Down
116 changes: 116 additions & 0 deletions crates/ssh-console/src/bmc/vendor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ use serde::{Deserialize, Deserializer, Serialize};
/// The escape sequence for IPMI is vendor-independent since it's specific to ipmitool.
pub static IPMITOOL_ESCAPE_SEQUENCE: EscapeSequence =
EscapeSequence::Pair((b'~', &[b'.', b'B', b'?', 0x1a, 0x18]));
const LENOVO_SOL_PRIMARY_FAILURE: &[u8] = b"The command line contains extraneous arguments";
const LENOVO_SOL_FALLBACK_ACTIVATE_COMMANDS: &[&[u8]] = &[b"console kill", b"console start"];

#[derive(Copy, Clone, Debug, PartialEq)]
pub enum BmcVendor {
Expand Down Expand Up @@ -171,6 +173,35 @@ impl SshBmcVendor {
}
}

pub fn fallback_serial_activate_commands_if_needed(
&self,
prompt_buf: &[u8],
fallback_sent: bool,
) -> Option<&'static [&'static [u8]]> {
match self {
SshBmcVendor::Lenovo
if !fallback_sent
&& bytes_contains(prompt_buf, LENOVO_SOL_PRIMARY_FAILURE)
&& self
.bmc_prompt()
.is_some_and(|prompt| bytes_contains(prompt_buf, prompt)) =>
{
Some(LENOVO_SOL_FALLBACK_ACTIVATE_COMMANDS)
}
_ => None,
}
}

pub fn should_accept_sol_activation_output(
&self,
prompt_buf: &[u8],
skip_data_read_len: usize,
) -> bool {
let lenovo_failure_pending = matches!(self, SshBmcVendor::Lenovo)
&& bytes_contains(prompt_buf, LENOVO_SOL_PRIMARY_FAILURE);
!lenovo_failure_pending && prompt_buf.len() > skip_data_read_len
}

pub fn filter_escape_sequences<'a>(
&self,
input: &'a [u8],
Expand Down Expand Up @@ -202,6 +233,10 @@ impl SshBmcVendor {
}
}

fn bytes_contains(buf: &[u8], pat: &[u8]) -> bool {
!pat.is_empty() && buf.windows(pat.len()).any(|window| window == pat)
}

#[derive(Clone, Copy, PartialEq)]
pub enum EscapeSequence {
// A single one-byte escape (ie. ctrl+\)
Expand Down Expand Up @@ -332,6 +367,18 @@ mod tests {
prev_pending: bool,
}

struct FallbackCase {
vendor: SshBmcVendor,
output: &'static [u8],
fallback_sent: bool,
}

struct AcceptActivationCase {
vendor: SshBmcVendor,
output: &'static [u8],
skip_data_read_len: usize,
}

/// The Lenovo/HPE two-byte escape (`ESC (`), used by most filtering rows.
const ESC_PAREN: EscapeSequence = EscapeSequence::Pair((0x1b, &[0x28]));

Expand Down Expand Up @@ -495,6 +542,75 @@ mod tests {
}
}

#[test]
fn fallback_serial_activate_commands_if_needed_detects_lenovo_failure() {
let lenovo_primary_failure =
b"console kill 1\r\nThe command line contains extraneous arguments\r\nsystem>";

value_scenarios!(
run = |case: FallbackCase| case.vendor
.fallback_serial_activate_commands_if_needed(case.output, case.fallback_sent)
.is_some();

"Lenovo SR650 v4 fallback" {
FallbackCase { vendor: SshBmcVendor::Lenovo, output: lenovo_primary_failure, fallback_sent: false } => true,
FallbackCase { vendor: SshBmcVendor::Lenovo, output: b"console kill 1\r\nThe command line contains extraneous arguments\r\n", fallback_sent: false } => false,
FallbackCase { vendor: SshBmcVendor::Lenovo, output: lenovo_primary_failure, fallback_sent: true } => false,
FallbackCase { vendor: SshBmcVendor::Dell, output: lenovo_primary_failure, fallback_sent: false } => false,
}
);

let commands = SshBmcVendor::Lenovo
.fallback_serial_activate_commands_if_needed(lenovo_primary_failure, false)
.expect("Lenovo failure should provide fallback commands");
assert_eq!(
commands,
&[b"console kill".as_slice(), b"console start".as_slice()]
);
}

#[test]
fn should_accept_sol_activation_output_handles_fallback_cases() {
let bmc_prompt = SshBmcVendor::Lenovo.bmc_prompt().unwrap();
let lenovo_primary_skip_len = bmc_prompt.len()
+ SshBmcVendor::Lenovo
.serial_activate_command()
.unwrap()
.len();
let lenovo_start_skip_len = bmc_prompt.len() + b"console start".len();

value_scenarios!(
run = |case: AcceptActivationCase| case.vendor.should_accept_sol_activation_output(
case.output,
case.skip_data_read_len,
);

"Lenovo primary failure waits for fallback" {
AcceptActivationCase {
vendor: SshBmcVendor::Lenovo,
output: b"console kill 1\r\nThe command line contains extraneous arguments\r\n",
skip_data_read_len: lenovo_primary_skip_len,
} => false,
}

"Lenovo fallback start succeeds by byte count" {
AcceptActivationCase {
vendor: SshBmcVendor::Lenovo,
output: b"console start\r\nroot@host # ",
skip_data_read_len: lenovo_start_skip_len,
} => true,
}

"non-Lenovo activation still succeeds by byte count" {
AcceptActivationCase {
vendor: SshBmcVendor::Dell,
output: b"connect com2\r\nready",
skip_data_read_len: b"connect com2".len(),
} => true,
}
);
}

Comment thread
williampnvidia marked this conversation as resolved.
#[test]
fn bmc_vendor_deserialize_rejects_an_unknown_string() {
scenarios!(
Expand Down
Loading
Loading