From 30e23d762757f95b9d1560b2794e8b27482aeff5 Mon Sep 17 00:00:00 2001 From: Ravikanth Nalla Date: Fri, 14 Nov 2025 10:38:19 +0000 Subject: [PATCH 01/94] CASM-5740: [FM on baremetal] Documentation for FM migration to baremetal - initial place holder for FM on baremetal docs --- operations/fm_on_baremetal/README.md | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 operations/fm_on_baremetal/README.md diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md new file mode 100644 index 0000000000000..2d5effa64a851 --- /dev/null +++ b/operations/fm_on_baremetal/README.md @@ -0,0 +1,50 @@ +# FM (Fabric Manager) on baremetal + +- [Introduction](#introduction) +- [Terminology and components](#terminology-and-components) + - [1](#1) + - [2](#2) + - [2.1](#2.1) + - [2.2](#2.2) + - [3](#3) +- [Architecture](#architecture) +- [Enable and configure](#enable-and-configure) +- [Troubleshooting](#troubleshooting) + +## Introduction + +HPE Cray Supercomputing EX systems are designed to maintain high availability (HA) for... + +To address these issues, CSM 1.7.1 includes FM on baremetal, which provides ... +to maintain HA of ... + +**NOTE**: + +- FM on baremetal is disabled by default. + +## Terminology and components + +### 1 +### 2 +### 3 + +## Architecture + +![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) + +The FM HA solution is implemented in following stages. These stages are: + +1. [Enable and configure](#enable-and-configure) +1. [Setup of FM HA](Setup_of_FM_HA.md) + +## Enable and configure + +How to enable and configure FM on baremetal depends on the context. +See the following links: + +- [Enabling FM On BaremetalPost CSM Install](Enabling_FM_On_Baremetal_Post_CSM_Install.md) +- [Enabling FM On BaremetalPost CSM Upgrade](Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md) + +## Troubleshooting + +For information on how to troubleshoot FM on baremetal, see [Troubleshooting](Troubleshooting.md). From cfd4f9b15cbbff9e0cfaf0fcce9acb255b77efc3 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 18 Nov 2025 17:44:39 +0530 Subject: [PATCH 02/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 2d5effa64a851..1232222eb49b1 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -13,10 +13,9 @@ ## Introduction -HPE Cray Supercomputing EX systems are designed to maintain high availability (HA) for... +The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. While the overall bare-metal Fabric Manager solution is described in the Slingshot Fabric Manager HA documentation , this CSM detail design document focuses specifically on the CSM-level enhancements required to integrate and support FMNs. -To address these issues, CSM 1.7.1 includes FM on baremetal, which provides ... -to maintain HA of ... +CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. **NOTE**: From 74eca40230b61806111d45e622ae2d26d08241c2 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 18 Nov 2025 22:34:52 +0530 Subject: [PATCH 03/94] Create Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- ...abling_FM_On_Baremetal_Post_CSM_Upgrade.md | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md new file mode 100644 index 0000000000000..c649356e3f884 --- /dev/null +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -0,0 +1,176 @@ +# Enabling Fabric Manager (FM) On baremetal post CSM Upgrade + +## Overview +* The new Fabric Manager Nodes (FMNs) would be added only after the CSM upgrade is complete. +* By default, FM on baremetal is disabled. +* This page documents the procedures for enabling and configuring FM on baremetal post CSM upgrade. +* FM on baremetal cannot be disabled after it has been enabled. + +## Post upgrade of CSM from 1.7.0 to 1.7.1 + +Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. + +## FMN Node Image Customization, Creation, and Deployment Procedure + +### Update SHCD with FMN (Fabric Manager Node) Information + +The administrator must update the SHCD to include the placement and cabling details of the new FMNs. + +### Configure FMN BMC + +Verify that the BMC of each FMN is configured with the correct root user credentials. + +### Perform CANU validation + +* Validate SHCD with respect to FMNs +* Map FMNs in the SHCD to the node type:Management_FabricManager when building the CCJ file +* Generate switch configuration for the node based on the new Role: Management , SubRole: FabricManager pairing + +Validate the SHCD. + +**For example:** + +``` bash +canu validate shcd -a TDS --shcd "System5 Surtur Shasta River RevA27.xlsx" --tabs edge,25G_10G,NMN,HMN --corners J1,T3,I14,Q55,I16,S21,J20,U41 --edge Arista +``` + +If the output looks good (Warnings about the CAN switch and SITE connections can be discounted) then generate the CCJ file. + +```bash +canu validate shcd -a TDS --shcd "System5 Surtur Shasta River RevA27.xlsx" --tabs edge,25G_10G,NMN,HMN --corners J1,T3,I14,Q55,I16,S21,J20,U41 --edge Arista --json --out surtur-ccj.json +``` + +Verify that the Fabric Manager nodes are present in the output CCJ file. + +```bash +jq -c '.topology[] | select(.common_name|contains("fmn"))' surtur-ccj.json +``` + +### Create the base image (only base OS; no Fabric Manager) for FMN + +#### The FMN node customization ansible roles + +Perform image customization on the existing NCN Kubernetes images for using it for FMN nodes +[../../operations/configuration_management/Management_Node_Image_Customization.md] + +### FabricManager Boot Preparation + +Create sat bootprep configuration file for FMN as below + +Update below sat bootprep configuration file with official/released versions. +set bootprep file path: BOOTPREP_FILE_PATH=/tmp/fmn_bootpre.yaml + +fmn_bootprep.yaml + +```yaml +schema_version: 1.0.2 +configurations: +- name: fmn-bm-default-configuration + layers: + - name: fmn-nodes-bm + playbook: ncn_nodes.yml + git: + commit: + url: https://api-gw-service-nmn.local/vcs/cray/csm-config-management.git + - name: fmn-initrd-bm + playbook: ncn-initrd.yml + git: + commit: + url: https://api-gw-service-nmn.local/vcs/cray/csm-config-management.git +images: +- name: fabricmanager-bm-node-image-1.0.0 + base: + product: + name: csm + version: 1.7.0 + type: image + filter: + prefix: secure-kubernetes + configuration: fmn-bm-default-configuration + configuration_group_names: + - Management_Fabric +``` + +Ansible role : csm.fm.baremetal + +**Note:** + +### New FMN image creation and uploade to S3 +BOOTPREP_FILE_PATH=./ + +Use below command on any of the master node to create the new FMN image and uploading the image to S3 + +```bash +sat bootprep +sat bootprep run \ + --limit images --limit configurations \ + --overwrite-images --overwrite-configs \ + --format json \ + --cfs-version v3 + --bos-version v2 \ + $BOOTPREP_FILE_PATH +``` + +**Note:** +The overwrite option "--overwrite-images" overwrites the previous/old image in S3 + +## FMN add procedure + +Follow NCN add procedure to add FMN nodes to CSM: + +https://github.com/Cray-HPE/docs-csm/blob/release/1.7/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +Scripts source (HSM/ SLS/ BSS): Cray-HPE/docs-csm at CASM-5647-5739 +Note: Interface level differences to be considered while following NCN add procedure for FMNs + +https://github.com/Cray-HPE/docs-csm/blob/release/1.7/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites +As part of the above prerequisites, there is a new prompt added to confirm if the node getting added is an FMN or not. + +https://github.com/Cray-HPE/docs-csm/blob/release/1.7/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls + As part of the above step, include the new parameter --fmn-image-id only for the FM node. The value for this parameter should be the image ID generated in Step 6.4. + +By the end of this procedure, SLS, HSM, BSS would be FMN data. Following validations could be performed if needed for confirmations - + +SLS hardware should list the new nodes +Eg - cray sls hardware describe x3000c0s28b0n0 + +IPs should be allocated and made available for FMNs in all of SLS networks +Note - NMN and HMN should be having additional FMN VIPs also allocated + +Eg - cray sls search networks list --name NMN --format json + +HSM ethernet interfaces should be updated with the same allocated IPs. +Eg - cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json + +BSS should be updated with new hosts entries for FMN with proper configurations +Note - BSS global parameters also needs to be updated with FMN IPs(VIP not included) + +Eg - cray bss bootparameters list --format json --name x3000c0s28b0n0 + +Eg - cray bss bootparameters list --hosts Global --format json + + +## Boot FMN Nodes with iPXE + +Example: + +iPXE boot commands +NODE=fmn001 (or) fmn002 + +fmn001 - x3000c0s28b0n0 + +fmn002 - x3000c0s29b0n0 + + BMC="${NODE}-mgmt"; echo $BMC +read -r -s -p "${BMC} root password: " IPMI_PASSWORD +export IPMI_PASSWORD +cray console interact (In a different screen to check the progress of the boot) +echo ${BMC} + +```bash +ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status +ipmitool -I lanplus -U root -E -H "${BMC}" chassis bootdev pxe options=efiboot +ipmitool -I lanplus -U root -E -H "${BMC}" chassis power off +ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status +ipmitool -I lanplus -U root -E -H "${BMC}" chassis power on +ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status +``` From f9e80c66c9f477787dbc395f4c070f8769b4bf74 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 00:42:58 +0530 Subject: [PATCH 04/94] Update Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- ...abling_FM_On_Baremetal_Post_CSM_Upgrade.md | 49 ++++++++++--------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md index c649356e3f884..ab5bbcbfad3aa 100644 --- a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -1,16 +1,18 @@ # Enabling Fabric Manager (FM) On baremetal post CSM Upgrade ## Overview -* The new Fabric Manager Nodes (FMNs) would be added only after the CSM upgrade is complete. -* By default, FM on baremetal is disabled. -* This page documents the procedures for enabling and configuring FM on baremetal post CSM upgrade. -* FM on baremetal cannot be disabled after it has been enabled. + +* Fabric Manager Nodes (FMNs) can be added only after the CSM upgrade has been completed. +* By default, Fabric Manager on baremetal is disabled. +* This document describes the procedures for providing the base OS image, provisioning storage LUNs, and configuring the necessary networking to support + Fabric Manager on baremetal following the CSM upgrade. +* Once enabled, Fabric Manager on baremetal cannot be disabled. ## Post upgrade of CSM from 1.7.0 to 1.7.1 Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. -## FMN Node Image Customization, Creation, and Deployment Procedure +## Prerequisites ### Update SHCD with FMN (Fabric Manager Node) Information @@ -46,21 +48,21 @@ Verify that the Fabric Manager nodes are present in the output CCJ file. jq -c '.topology[] | select(.common_name|contains("fmn"))' surtur-ccj.json ``` -### Create the base image (only base OS; no Fabric Manager) for FMN +## FMN Node Image Customization and Deployment Procedure -#### The FMN node customization ansible roles +The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps detail the process for generating the FMN base image with the required components and deploying it to FMN nodes. + +### Create the base image (only base OS; no Fabric Manager) for FMN -Perform image customization on the existing NCN Kubernetes images for using it for FMN nodes -[../../operations/configuration_management/Management_Node_Image_Customization.md] +Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements [../../operations/configuration_management/Management_Node_Image_Customization.md] -### FabricManager Boot Preparation +#### Fabric Manager Boot Preparation -Create sat bootprep configuration file for FMN as below +Create `sat bootprep` configuration file (`fmn_bootprep.yaml`) for FMN as below. -Update below sat bootprep configuration file with official/released versions. -set bootprep file path: BOOTPREP_FILE_PATH=/tmp/fmn_bootpre.yaml +**Note:** Ensure that the `fmn_bootprep.yaml` configuration file is updated with the official released versions before proceeding. -fmn_bootprep.yaml +For Example: ```yaml schema_version: 1.0.2 @@ -82,7 +84,7 @@ images: base: product: name: csm - version: 1.7.0 + version: 1.7.1 type: image filter: prefix: secure-kubernetes @@ -91,17 +93,19 @@ images: - Management_Fabric ``` -Ansible role : csm.fm.baremetal +#### New FMN image creation and uploade to S3 -**Note:** +Execute the commands below on any master node to generate the new FMN image and upload it to the S3 storage. -### New FMN image creation and uploade to S3 -BOOTPREP_FILE_PATH=./ +First set `bootprep` file path: + +```bash +# BOOTPREP_FILE_PATH=./fmn_bootpre.yaml +``` -Use below command on any of the master node to create the new FMN image and uploading the image to S3 +Now execute the `sat bootprep run` command below to generate the new base image and upload it to S3. ```bash -sat bootprep sat bootprep run \ --limit images --limit configurations \ --overwrite-images --overwrite-configs \ @@ -111,8 +115,7 @@ sat bootprep run \ $BOOTPREP_FILE_PATH ``` -**Note:** -The overwrite option "--overwrite-images" overwrites the previous/old image in S3 +**Note:** Using the `--overwrite-images` option in the command above will overwrite any previously uploaded images in S3. ## FMN add procedure From 07291aac3fdaadcd4e1b565227b57cdacc4a402b Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 01:33:40 +0530 Subject: [PATCH 05/94] Update Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- ...abling_FM_On_Baremetal_Post_CSM_Upgrade.md | 138 +++++++++++++----- 1 file changed, 100 insertions(+), 38 deletions(-) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md index ab5bbcbfad3aa..1b20ffa7b3b0b 100644 --- a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -2,9 +2,9 @@ ## Overview -* Fabric Manager Nodes (FMNs) can be added only after the CSM upgrade has been completed. +* Fabric Manager Nodes (`FMNs`) can be added only after the CSM upgrade has been completed. * By default, Fabric Manager on baremetal is disabled. -* This document describes the procedures for providing the base OS image, provisioning storage LUNs, and configuring the necessary networking to support +* This document describes the procedures for providing the base OS image, provisioning storage LUNs, and configuring the necessary networking to support. Fabric Manager on baremetal following the CSM upgrade. * Once enabled, Fabric Manager on baremetal cannot be disabled. @@ -25,8 +25,8 @@ Verify that the BMC of each FMN is configured with the correct root user credent ### Perform CANU validation * Validate SHCD with respect to FMNs -* Map FMNs in the SHCD to the node type:Management_FabricManager when building the CCJ file -* Generate switch configuration for the node based on the new Role: Management , SubRole: FabricManager pairing +* Map FMNs in the SHCD to the node type: `Management_FabricManager` when building the CCJ file +* Generate switch configuration for the node based on the new Role: `Management` , SubRole: `FabricManager` pairing Validate the SHCD. @@ -52,11 +52,11 @@ jq -c '.topology[] | select(.common_name|contains("fmn"))' surtur-ccj.json The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps detail the process for generating the FMN base image with the required components and deploying it to FMN nodes. -### Create the base image (only base OS; no Fabric Manager) for FMN +### Create FMN base image (only base OS; no Fabric Manager) -Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements [../../operations/configuration_management/Management_Node_Image_Customization.md] +Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. See (../../operations/configuration_management/Management_Node_Image_Customization.md) -#### Fabric Manager Boot Preparation +#### FMN Boot Preparation Create `sat bootprep` configuration file (`fmn_bootprep.yaml`) for FMN as below. @@ -93,7 +93,7 @@ images: - Management_Fabric ``` -#### New FMN image creation and uploade to S3 +#### New FMN base image creation and uploade to S3 Execute the commands below on any master node to generate the new FMN image and upload it to the S3 storage. @@ -119,61 +119,123 @@ sat bootprep run \ ## FMN add procedure -Follow NCN add procedure to add FMN nodes to CSM: +After creating the FMN base image, add FMN nodes to CSM by following the [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md) -https://github.com/Cray-HPE/docs-csm/blob/release/1.7/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md -Scripts source (HSM/ SLS/ BSS): Cray-HPE/docs-csm at CASM-5647-5739 -Note: Interface level differences to be considered while following NCN add procedure for FMNs +**Note:** -https://github.com/Cray-HPE/docs-csm/blob/release/1.7/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites -As part of the above prerequisites, there is a new prompt added to confirm if the node getting added is an FMN or not. +* Below are the Interface level differences to be considered while following NCN add procedure for FMNs: + * As part of the [prerequisites](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites), there is a new prompt added to + confirm if the node getting added is an FMN or not. + * As part of the [step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls, include the new parameter + `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the FMN base image creation stage above. -https://github.com/Cray-HPE/docs-csm/blob/release/1.7/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls - As part of the above step, include the new parameter --fmn-image-id only for the FM node. The value for this parameter should be the image ID generated in Step 6.4. +After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. -By the end of this procedure, SLS, HSM, BSS would be FMN data. Following validations could be performed if needed for confirmations - +The following checks can be used to verify that the updates have been correctly applied: -SLS hardware should list the new nodes -Eg - cray sls hardware describe x3000c0s28b0n0 +### SLS hardware should list the new nodes -IPs should be allocated and made available for FMNs in all of SLS networks -Note - NMN and HMN should be having additional FMN VIPs also allocated +For Example: + +```bash +cray sls hardware describe x3000c0s28b0n0 +``` -Eg - cray sls search networks list --name NMN --format json +### IPs should be allocated and made available for FMNs in all of SLS networks -HSM ethernet interfaces should be updated with the same allocated IPs. -Eg - cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json +**Note:** NMN and HMN should be having additional FMN VIPs also allocated. -BSS should be updated with new hosts entries for FMN with proper configurations -Note - BSS global parameters also needs to be updated with FMN IPs(VIP not included) +For Example: -Eg - cray bss bootparameters list --format json --name x3000c0s28b0n0 +```bash +cray sls search networks list --name NMN --format json +``` -Eg - cray bss bootparameters list --hosts Global --format json +### HSM ethernet interfaces should be updated with the same allocated IPs +For Example: + +```bash +cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json +``` + +### BSS should be updated with new hosts entries for FMN with proper configurations + +**Note:** BSS global parameters also needs to be updated with FMN IPs(VIP not included). + +For Example: + +```bash +cray bss bootparameters list --format json --name x3000c0s28b0n0 +``` + +```bash +cray bss bootparameters list --hosts Global --format json +``` ## Boot FMN Nodes with iPXE -Example: +Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. -iPXE boot commands -NODE=fmn001 (or) fmn002 - -fmn001 - x3000c0s28b0n0 - -fmn002 - x3000c0s29b0n0 - - BMC="${NODE}-mgmt"; echo $BMC +### Set BMC with node name + +```bash +BMC="${NODE}-mgmt"; echo $BMC +``` + +**Note: ** Here the NODE can be `fmn001` (or) `fmn002`. For example, consider `fmn001` with xname `x3000c0s28b0n0` and `fmn002` with xname `x3000c0s29b0n0`. + +### Get and export IPMI credentials + +```bash read -r -s -p "${BMC} root password: " IPMI_PASSWORD export IPMI_PASSWORD -cray console interact (In a different screen to check the progress of the boot) +``` + +### Open console to check the progress of the upcoming boot + +Run below command in a different screen to check the progress of the boot which we are going to initiate in the next step. + +**Note: ** Here `xname` can be `fmn001 or `fmn002` based on which FMN is getting booted with. + +```bash +cray console interact echo ${BMC} +``` + +### Check the current chassis power status ```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status +``` +### Set the boot option + +```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis bootdev pxe options=efiboot +``` + +### Poer off the chassis + +Power off the chassis: + +```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power off +``` +Check the chassis power status: + +```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status +``` +### Power on the chassis + +Power on the chassis: + +```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power on +``` + +Check the chassis power status: + +```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ``` From 7acddabd46a20a9ecae98e6eea1e554580591bc5 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 01:46:30 +0530 Subject: [PATCH 06/94] Update Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- ...abling_FM_On_Baremetal_Post_CSM_Upgrade.md | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md index 1b20ffa7b3b0b..ea091ee605894 100644 --- a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -173,6 +173,43 @@ cray bss bootparameters list --format json --name x3000c0s28b0n0 cray bss bootparameters list --hosts Global --format json ``` +## Update switch configuration using CANU + +**Note: ** This step cannot be performed until the Fabric Manager nodes have been added to SLS. + +In order to generate new configuration the following is required: + +* A CCJ file +* Any custom config file specific to the system +* A SLS file that contains the FMNs (`cray sls dumpstate list --format json` may be used to obtain this once SLS has been updated on the running system) +* Knowledge of whether the system has the NMN Isolation feature enabled or not + +### Generate the switch configuration + +For Example: + +```bash +canu generate network config -a TDS --csm 1.7 --custom-config custom_switch_config.yaml --edge Arista --sls-file sls_input_file.json --ccj surtur-ccj.json --folder output (--enable-nmn-isolation --nmn-pvlan ) +``` + +### Validate the generated switch configuration against the network switches + +* TDS style systems have the management nodes plugged directly into the spine switches, most will only have a single leaf-bmc switch. +* Systems that use the "Full" architecture will have the management nodes plugged into the leaf switches. + +The configuration generated here will contain updates for the leaf-bmc switch(es) for the Fabric Manager node BMCs and updates to either the spine switches or the leaf switches for the bonded connection. + +For Example: + +```bash +canu validate switch config --ip 10.254.0.4 --generated output/sw-leaf-bmc-001.cfg +``` + +**Note:** CANU will likely suggest the removal of the snmpv3 user, this is because the SNMP configuration is not held in the `custom_config.yaml` file because it's not permitted to store secrets in GitHub. Do NOT remove this configuration from the switch. + +Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ..." rule be sure to create the new rule before removing the old one. It is possible to lose access to the switch if the ACLs are not applied in the correct order. + + ## Boot FMN Nodes with iPXE Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. @@ -214,7 +251,7 @@ ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ipmitool -I lanplus -U root -E -H "${BMC}" chassis bootdev pxe options=efiboot ``` -### Poer off the chassis +### Power off the chassis Power off the chassis: @@ -239,3 +276,7 @@ Check the chassis power status: ```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ``` + +## Cleanup + +Uninstall existing FM helm chart From d24ad075a2c16be85d5f585ec7b7dae0994257ee Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 01:49:32 +0530 Subject: [PATCH 07/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 9 --------- 1 file changed, 9 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 1232222eb49b1..319540de7b9d3 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -2,11 +2,6 @@ - [Introduction](#introduction) - [Terminology and components](#terminology-and-components) - - [1](#1) - - [2](#2) - - [2.1](#2.1) - - [2.2](#2.2) - - [3](#3) - [Architecture](#architecture) - [Enable and configure](#enable-and-configure) - [Troubleshooting](#troubleshooting) @@ -23,10 +18,6 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i ## Terminology and components -### 1 -### 2 -### 3 - ## Architecture ![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) From 65bb9c8f974b718694befeabb4a3a802c5620ad5 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 01:51:16 +0530 Subject: [PATCH 08/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 319540de7b9d3..3660bf1bcf555 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -14,7 +14,7 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i **NOTE**: -- FM on baremetal is disabled by default. +* FM on baremetal is disabled by default. ## Terminology and components @@ -22,7 +22,7 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i ![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) -The FM HA solution is implemented in following stages. These stages are: +The FM on baremetal solution is implemented in following stages. These stages are: 1. [Enable and configure](#enable-and-configure) 1. [Setup of FM HA](Setup_of_FM_HA.md) From 8d8a67ae2de8a2796320ea9128192b666ff4b8cc Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 02:17:12 +0530 Subject: [PATCH 09/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 3660bf1bcf555..7240e53cb5dab 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -15,8 +15,33 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i **NOTE**: * FM on baremetal is disabled by default. +* FM cannot be disabled after it has been enabled. -## Terminology and components +## Terminology and dependency components + +### SHS +[Slingshot Host Software](../../glossary.md#slingshot-host-software-shs) + +### FM +[Fabric Manager](...) + +### FMN +[Fabric Manager Node](...) + +### SLS +[System Layout Service](../../glossary.md#system-layout-service-sls) + +### HSM +[Hardware State Manager](../../glossary.md#hardware-state-manager-hsm) + +### BSS +[Boot Script Service](../../glossary.md#boot-script-service-bss) + +### SAT +[System Admon Toolkit](../../glossary.md#system-admin-toolkit-sat) + +### SMA +[System Monitoring Application](../../glossary.md#system-monitoring-application-sma) ## Architecture From 352c0935ac543c832c158b8acb28c3ea22b61764 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 02:51:45 +0530 Subject: [PATCH 10/94] Update Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md index ea091ee605894..ed7e26d4fb0e1 100644 --- a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -12,6 +12,13 @@ Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. +* Step 1: [Prerequisites and Planning](#prerequisites) +* step 2: [FMN Base Image Creation](#fmn-node-image-customization-and-deployment-procedure) +* Step 3: [Add FMN Nodes to CSM](#fmn-add-procedure) +* step 4: [Network Configuration](#update-switch-configuration-using-canu) +* Step 5: [Boot FMN Nodes](#boot-fmn-nodes-with-ipxe) +* step 6: [Cleanup](#cleanup) + ## Prerequisites ### Update SHCD with FMN (Fabric Manager Node) Information From 066cba0d7850e8226b1b94ab4d49a470f3b2b884 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:55:33 +0530 Subject: [PATCH 11/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 7240e53cb5dab..c7ab34cb908ea 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -17,6 +17,13 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i * FM on baremetal is disabled by default. * FM cannot be disabled after it has been enabled. + +## Assumptions + +* FMNs are considered Management nodes. +* The two FMNs are part of different racks to support Rack Resiliency. +* This feature will not be supported on systems with Dell/Mellanox based management networks. + ## Terminology and dependency components ### SHS @@ -37,6 +44,9 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i ### BSS [Boot Script Service](../../glossary.md#boot-script-service-bss) +### CANU +[CSM Automatic Network Utility](../../glossary.md#csm-automatic-network-utility-canu) + ### SAT [System Admon Toolkit](../../glossary.md#system-admin-toolkit-sat) From e36d4ae720dae731d38fe2c4da08c6501c5b152f Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:29:42 +0530 Subject: [PATCH 12/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index c7ab34cb908ea..fee24eedace5b 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -16,13 +16,9 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i * FM on baremetal is disabled by default. * FM cannot be disabled after it has been enabled. - - -## Assumptions - -* FMNs are considered Management nodes. -* The two FMNs are part of different racks to support Rack Resiliency. -* This feature will not be supported on systems with Dell/Mellanox based management networks. +* `FMNs` are considered Management nodes. +* The two `FMNs` must be part of two different management racks to support Rack Resiliency. +* This feature will not be supported on systems with Dell/ Mellanox based management networks. ## Terminology and dependency components @@ -36,6 +32,7 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i [Fabric Manager Node](...) ### SLS + [System Layout Service](../../glossary.md#system-layout-service-sls) ### HSM From 95b23744653d2b42cb727e962762a0fa293324c1 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:46:04 +0530 Subject: [PATCH 13/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index fee24eedace5b..3629dfbb57c9c 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -52,6 +52,14 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i ## Architecture +In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses native Kubernetes capabilities—both during upgrades and in failure/HA scenarios. Kubernetes itself provides health checks and a scheduler that can rebalance workloads across nodes based on load, administrative policies, and other criteria. The Fabric Manager is deployed as a single pod in Kubernetes. A traditional HA model for Fabric Manager doesn’t map cleanly into Kubernetes, so instead, Kubernetes’ built-in mechanisms detect failures and spin up a replacement pod, minimizing downtime. + +In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and recreate the Fabric Manager on another node, providing continuity. + +In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes’ “best‑effort” scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. + +To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. + ![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) The FM on baremetal solution is implemented in following stages. These stages are: From 5b757f6243a0f3091b7a57c747fd66143db34109 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 20:42:39 +0530 Subject: [PATCH 14/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 3629dfbb57c9c..faf8e0050bac0 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -62,18 +62,9 @@ To address these issues, CSM 1.7.1 includes FM on baremetal support, which provi ![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) -The FM on baremetal solution is implemented in following stages. These stages are: - -1. [Enable and configure](#enable-and-configure) -1. [Setup of FM HA](Setup_of_FM_HA.md) - ## Enable and configure -How to enable and configure FM on baremetal depends on the context. -See the following links: - -- [Enabling FM On BaremetalPost CSM Install](Enabling_FM_On_Baremetal_Post_CSM_Install.md) -- [Enabling FM On BaremetalPost CSM Upgrade](Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md) +[Enabling FM On BaremetalPost CSM Upgrade](Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md) ## Troubleshooting From 675cc0fa5cd516800d9f1e637780117e8a8e2369 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 21 Nov 2025 20:59:33 +0530 Subject: [PATCH 15/94] Update Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md index ed7e26d4fb0e1..d7b587b8128ea 100644 --- a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -133,7 +133,7 @@ After creating the FMN base image, add FMN nodes to CSM by following the [NCN ad * Below are the Interface level differences to be considered while following NCN add procedure for FMNs: * As part of the [prerequisites](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites), there is a new prompt added to confirm if the node getting added is an FMN or not. - * As part of the [step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls, include the new parameter + * As part of the [step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls), include the new parameter `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the FMN base image creation stage above. After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. @@ -238,7 +238,7 @@ export IPMI_PASSWORD ### Open console to check the progress of the upcoming boot -Run below command in a different screen to check the progress of the boot which we are going to initiate in the next step. +Run below command in a different terminal to check the progress of the boot which we are going to initiate in the next step. **Note: ** Here `xname` can be `fmn001 or `fmn002` based on which FMN is getting booted with. @@ -284,6 +284,10 @@ Check the chassis power status: ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ``` -## Cleanup +## Install/ Upgrade Fabric Manager on FM baremetal nodes -Uninstall existing FM helm chart +[Refer FabricManager Upgrade(...) + +## Uninstall FM helm chart (FM k8s pod) on the management nodes + +After FMNs have comeup healthy and Running, uninstall existing FM helm chart `slingshot-fabric-manager` From fe154a7ee90a9faa50407bf3cd6a09f04e6a5d48 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Sun, 23 Nov 2025 23:09:59 +0530 Subject: [PATCH 16/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- upgrade/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/upgrade/README.md b/upgrade/README.md index 5e601519de589..ce22b7fc553ff 100644 --- a/upgrade/README.md +++ b/upgrade/README.md @@ -9,6 +9,7 @@ software. Choose the appropriate procedure from the sections below. * [Option 2: Upgrade only additional HPE Cray EX software products](#option-2-upgrade-only-additional-hpe-cray-ex-software-products) * [Option 3: Upgrade only CSM](#option-3-upgrade-only-csm) * [CSM patch version upgrade](#csm-patch-version-upgrade) +* [FM On Baremetal](#fm_on_baremetal) ## Release Notes @@ -60,3 +61,7 @@ CSM 1.7 patch upgrades. Instead, consider upgrading to the latest CSM 1.7 patch There are no CSM 1.7 patch versions currently available. When any become available, they will be listed here. + +## FM On Baremetal + +After completing the CSM upgrade, if user wish to enable FM on bare he can follow the [procedure](../operations/fm_on_baremetal/README.md#fm-fabric-manager-on-baremetal). From 7f58def497a1576960a0018c76cd173e9eb6b8d6 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 00:18:13 +0530 Subject: [PATCH 17/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- upgrade/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/upgrade/README.md b/upgrade/README.md index ce22b7fc553ff..f6ebd44fdd679 100644 --- a/upgrade/README.md +++ b/upgrade/README.md @@ -64,4 +64,5 @@ be listed here. ## FM On Baremetal -After completing the CSM upgrade, if user wish to enable FM on bare he can follow the [procedure](../operations/fm_on_baremetal/README.md#fm-fabric-manager-on-baremetal). +Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow the +[procedure](../operations/fm_on_baremetal/README.md#fm-fabric-manager-on-baremetal). From 4e0072fc840e64dd903a2eb32097207628962fa5 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 00:41:26 +0530 Subject: [PATCH 18/94] Update Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- ...abling_FM_On_Baremetal_Post_CSM_Upgrade.md | 71 +++++++++++-------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md index d7b587b8128ea..5b8695bc3b369 100644 --- a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md +++ b/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md @@ -1,25 +1,35 @@ -# Enabling Fabric Manager (FM) On baremetal post CSM Upgrade +# Configure FM (Fabric Manager) On Baremetal -## Overview +This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage LUNs, +and configuring the necessary networking to support Fabric Manager on baremetal following the CSM upgrade. + +## Requirements + +* Hardware requirements - 2 bare-metal nodes with dedicated boot and data disks +* Software requirements - OS (SLES SP7), CSM services like CANU, HSM, SLS, BSS, CSI, CFS, ansible playbooks for FMN + +## Note: * Fabric Manager Nodes (`FMNs`) can be added only after the CSM upgrade has been completed. * By default, Fabric Manager on baremetal is disabled. -* This document describes the procedures for providing the base OS image, provisioning storage LUNs, and configuring the necessary networking to support. - Fabric Manager on baremetal following the CSM upgrade. * Once enabled, Fabric Manager on baremetal cannot be disabled. ## Post upgrade of CSM from 1.7.0 to 1.7.1 Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. -* Step 1: [Prerequisites and Planning](#prerequisites) -* step 2: [FMN Base Image Creation](#fmn-node-image-customization-and-deployment-procedure) -* Step 3: [Add FMN Nodes to CSM](#fmn-add-procedure) -* step 4: [Network Configuration](#update-switch-configuration-using-canu) -* Step 5: [Boot FMN Nodes](#boot-fmn-nodes-with-ipxe) -* step 6: [Cleanup](#cleanup) +* Step 1: [FMN Prerequisites](#fmn-prerequisites) +* step 2: [FMN Pre Boot](#fmn-pre-boot) + * [FMN Base Image Creation](#fmn-base-image-creation) + * [Add FMN Nodes to CSM](#add-fmn-to-csm) + * [Update Switch Configuration With CANU](#update-switch-configuration-with-canu) +* Step 3: [FMN Booting](#fmn-booting) +* Step 4: [FMN Post Boot](#fmn-post-boot) + * [Validation](#validation) + * [Install Fabric Manager on FM baremetal nodes](#install-fabric-manager-on-fm-baremetal-nodes) +* Step 5: [Uninstall FMN Helm Chart](#uninstall-fmn-helm-chart) -## Prerequisites +## FMN Prerequisites ### Update SHCD with FMN (Fabric Manager Node) Information @@ -55,15 +65,17 @@ Verify that the Fabric Manager nodes are present in the output CCJ file. jq -c '.topology[] | select(.common_name|contains("fmn"))' surtur-ccj.json ``` -## FMN Node Image Customization and Deployment Procedure +## FMN Pre Boot + +### FMN Base Image Creation The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps detail the process for generating the FMN base image with the required components and deploying it to FMN nodes. -### Create FMN base image (only base OS; no Fabric Manager) +#### Create FMN base image (only base OS; no Fabric Manager) Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. See (../../operations/configuration_management/Management_Node_Image_Customization.md) -#### FMN Boot Preparation +##### FMN Boot Preparation Create `sat bootprep` configuration file (`fmn_bootprep.yaml`) for FMN as below. @@ -100,7 +112,7 @@ images: - Management_Fabric ``` -#### New FMN base image creation and uploade to S3 +##### New FMN base image creation and uploade to S3 Execute the commands below on any master node to generate the new FMN image and upload it to the S3 storage. @@ -124,7 +136,7 @@ sat bootprep run \ **Note:** Using the `--overwrite-images` option in the command above will overwrite any previously uploaded images in S3. -## FMN add procedure +### Add FMN Nodes to CSM After creating the FMN base image, add FMN nodes to CSM by following the [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md) @@ -140,7 +152,7 @@ After completion of the NCN add procedure, SLS, HSM, and BSS will contain the co The following checks can be used to verify that the updates have been correctly applied: -### SLS hardware should list the new nodes +#### SLS hardware should list the new nodes For Example: @@ -148,7 +160,7 @@ For Example: cray sls hardware describe x3000c0s28b0n0 ``` -### IPs should be allocated and made available for FMNs in all of SLS networks +#### IPs should be allocated and made available for FMNs in all of SLS networks **Note:** NMN and HMN should be having additional FMN VIPs also allocated. @@ -158,7 +170,7 @@ For Example: cray sls search networks list --name NMN --format json ``` -### HSM ethernet interfaces should be updated with the same allocated IPs +#### HSM ethernet interfaces should be updated with the same allocated IPs For Example: @@ -166,7 +178,7 @@ For Example: cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json ``` -### BSS should be updated with new hosts entries for FMN with proper configurations +#### BSS should be updated with new hosts entries for FMN with proper configurations **Note:** BSS global parameters also needs to be updated with FMN IPs(VIP not included). @@ -180,7 +192,7 @@ cray bss bootparameters list --format json --name x3000c0s28b0n0 cray bss bootparameters list --hosts Global --format json ``` -## Update switch configuration using CANU +### Update Switch Configuration With CANU **Note: ** This step cannot be performed until the Fabric Manager nodes have been added to SLS. @@ -191,7 +203,7 @@ In order to generate new configuration the following is required: * A SLS file that contains the FMNs (`cray sls dumpstate list --format json` may be used to obtain this once SLS has been updated on the running system) * Knowledge of whether the system has the NMN Isolation feature enabled or not -### Generate the switch configuration +#### Generate the switch configuration For Example: @@ -199,7 +211,7 @@ For Example: canu generate network config -a TDS --csm 1.7 --custom-config custom_switch_config.yaml --edge Arista --sls-file sls_input_file.json --ccj surtur-ccj.json --folder output (--enable-nmn-isolation --nmn-pvlan ) ``` -### Validate the generated switch configuration against the network switches +#### Validate the generated switch configuration against the network switches * TDS style systems have the management nodes plugged directly into the spine switches, most will only have a single leaf-bmc switch. * Systems that use the "Full" architecture will have the management nodes plugged into the leaf switches. @@ -217,7 +229,7 @@ canu validate switch config --ip 10.254.0.4 --generated output/sw-leaf-bmc-001.c Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ..." rule be sure to create the new rule before removing the old one. It is possible to lose access to the switch if the ACLs are not applied in the correct order. -## Boot FMN Nodes with iPXE +## FMN Booting Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. @@ -283,11 +295,14 @@ Check the chassis power status: ```bash ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ``` +## FMN Post Boot + +### Validaiton -## Install/ Upgrade Fabric Manager on FM baremetal nodes +### Install Fabric Manager on FM baremetal nodes -[Refer FabricManager Upgrade(...) +For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Upgrade](...) -## Uninstall FM helm chart (FM k8s pod) on the management nodes +## Uninstall FMN Helm Chart -After FMNs have comeup healthy and Running, uninstall existing FM helm chart `slingshot-fabric-manager` +After FMNs have comeup healthy and Running, uninstall existing FM helm chart (FM K8s pod) `slingshot-fabric-manager`. From 40bef73db7aa8ff51d1bc4b0636aa0fa5ac91ae7 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 00:41:41 +0530 Subject: [PATCH 19/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index faf8e0050bac0..34475fdbb5ebf 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -3,7 +3,7 @@ - [Introduction](#introduction) - [Terminology and components](#terminology-and-components) - [Architecture](#architecture) -- [Enable and configure](#enable-and-configure) +- [Configure FM on baremetal(#configure-fm-on-baremetal) - [Troubleshooting](#troubleshooting) ## Introduction @@ -62,9 +62,9 @@ To address these issues, CSM 1.7.1 includes FM on baremetal support, which provi ![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) -## Enable and configure +## Configure FM on baremetal -[Enabling FM On BaremetalPost CSM Upgrade](Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md) +To configure FM on baremetal please follow the [procedure](Configure_FM_On_Baremetal.md). ## Troubleshooting From 7194d288bdb862b8fe5e8df93d1d515b9dde0116 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 00:45:13 +0530 Subject: [PATCH 20/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 34475fdbb5ebf..6e936de8e8d21 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -1,9 +1,9 @@ # FM (Fabric Manager) on baremetal - [Introduction](#introduction) -- [Terminology and components](#terminology-and-components) +- [Terminology and Components](#terminology-and-components) - [Architecture](#architecture) -- [Configure FM on baremetal(#configure-fm-on-baremetal) +- [Configure FM on baremetal](#configure-fm-on-baremetal) - [Troubleshooting](#troubleshooting) ## Introduction @@ -20,7 +20,7 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i * The two `FMNs` must be part of two different management racks to support Rack Resiliency. * This feature will not be supported on systems with Dell/ Mellanox based management networks. -## Terminology and dependency components +## Terminology and Components ### SHS [Slingshot Host Software](../../glossary.md#slingshot-host-software-shs) From a55d4fc8de02f8fb1bce3793ec53504ddb431fa5 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 00:46:13 +0530 Subject: [PATCH 21/94] Rename Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md to Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- ...Baremetal_Post_CSM_Upgrade.md => Configure_FM_On_Baremetal.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename operations/fm_on_baremetal/{Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md => Configure_FM_On_Baremetal.md} (100%) diff --git a/operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md similarity index 100% rename from operations/fm_on_baremetal/Enabling_FM_On_Baremetal_Post_CSM_Upgrade.md rename to operations/fm_on_baremetal/Configure_FM_On_Baremetal.md From f76a7111e5534d60a5214476281a19f10c3cf629 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 00:54:09 +0530 Subject: [PATCH 22/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 5b8695bc3b369..9066b7f8f3ba2 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -21,7 +21,7 @@ Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable F * Step 1: [FMN Prerequisites](#fmn-prerequisites) * step 2: [FMN Pre Boot](#fmn-pre-boot) * [FMN Base Image Creation](#fmn-base-image-creation) - * [Add FMN Nodes to CSM](#add-fmn-to-csm) + * [Add FMN Nodes to CSM](#add-fmn-nodes-to-csm) * [Update Switch Configuration With CANU](#update-switch-configuration-with-canu) * Step 3: [FMN Booting](#fmn-booting) * Step 4: [FMN Post Boot](#fmn-post-boot) @@ -297,7 +297,12 @@ ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ``` ## FMN Post Boot -### Validaiton +### Validation + +#### Validate FMN nodes base OS bring up successful completion +#### Validate FMN required networking configuration +#### Validate FMN required storage configuration (LVM partitions) +#### Validate addition of FM required repositories ### Install Fabric Manager on FM baremetal nodes From 41d762c5b1e92debb3af658cdf065441ff0720b3 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:01:31 +0530 Subject: [PATCH 23/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 6e936de8e8d21..ad6e89cff28e1 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -4,6 +4,7 @@ - [Terminology and Components](#terminology-and-components) - [Architecture](#architecture) - [Configure FM on baremetal](#configure-fm-on-baremetal) +- [Slngshot Switch Firmware Update](#slngshot-switch-firmware-update) - [Troubleshooting](#troubleshooting) ## Introduction @@ -66,6 +67,11 @@ To address these issues, CSM 1.7.1 includes FM on baremetal support, which provi To configure FM on baremetal please follow the [procedure](Configure_FM_On_Baremetal.md). +## Slngshot Switch Firmware Update + +* For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as before. No changes are required in this workflow. +* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. [See](...) + ## Troubleshooting For information on how to troubleshoot FM on baremetal, see [Troubleshooting](Troubleshooting.md). From 42a50d762824112540e656fdeb720e946face053 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:14:53 +0530 Subject: [PATCH 24/94] Add files via upload Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/FM-HA-1.png | Bin 0 -> 26972 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 operations/fm_on_baremetal/FM-HA-1.png diff --git a/operations/fm_on_baremetal/FM-HA-1.png b/operations/fm_on_baremetal/FM-HA-1.png new file mode 100644 index 0000000000000000000000000000000000000000..996d45bc821ed74aa811ba4d393cdfcd251f26ac GIT binary patch literal 26972 zcmc$GcRben|9)U=TbdDau~I<%+eGj5loY_^7qka_R1bdZ_TQA_eN=y%nj*r5hlHkTgW}{@buh}%m4f1Kd<1%3a~V0DDB*e`}-GoDg_x< zz--!Tg5K|;+>xt$#B>?6PO9M=TT5t%`7<`SjO{es%U=0ThxtD@Pt$i7tQt6dm1At@ zOD3qoslvhWJon{VzX7&dqQy`uN7VJ$3t^b8HHXT^d<-$8yuj@~K@Rbi+LPmW9@D34 zI7Zk;*w~dg_El>wdM23DIMF&k(wDa`t-?Q6UDqBSZS|MmbQLVU#lLtDRi^syTHRvq zUJbJB>vdQ9dsQ@eP&zkJ&CF@59QI>+{Qp}mO*B~wO<%HL=0&U{W@MG&1}iCy_t~!6 zlcSZ}sU~;b=QFfzifkuH15*#DSbIbgjgJ;|tF0p>_pPc}9M90Yn&8*&%_Op}aqr`u zP41FBSlaDy@aAuR?>#3&7cw1Z>>aXpe6-@jdU5P{yF81c+{ji@sP3Jl_+ho-Y%A6o>h8sTd}egB455T(uo|JioN+MQ94J<~ofpZ($oICkp>58h>%?TkE$gt>F4xK0G?xlPARc8aYenc2CF zy3GoY`;$(@=o%$(*jE>{F)Y&my|&M#YXdA(R`}XENDr3Fjq|C*Tx@B3Kh};vqxzPu zeTkBoiI_1xEP5hB?3ZxWOR?<%bMuTC9envU9esAjgge?~<8aT{}7 z(?lqD%0xJ=glLM__TVP=N%D#(tDI?5gl1Oxn73G8KvkPx6`jxF8d=upOw~rGa()7R zN0-m>hT`k*54sGWjKnN_w&GoWb0qew!n_KWAnHw*`kY^BzmZreX19FQ-3I^8lM!dX zuM7P~9-PUHHE*RDKLpaa=v|pyZ6qYz_tzbN-X-|MHfo(-O7D4rj>H;kt;~55rtkWe z$O@#%o{ryWTt+bzkHxdex-K?a!XxjC&;_E7g0SEK7&tb+Qd< z$|lv(E0U_(cPJnSGmMRzXf{E=i=+?`sb|*p$n=jsd|<k7m zZvG{bDdDm^t?N&W@WR{lr*vKFo)e^N0o)RGKn>An1CCHSF6kq>Dmh~NHymUS@kp)ZM&}S{wcvD_ZRx9hlvKu%YKf%id z3OtPj{BP#_l)0;;dlae7rbCSn%?v!2OAA%H&S9f)>KjXri`uyog{RF@wqI~FaL<19 ziIjdr+Z*F)UwgD2O}Iy|wAi;1E;OpJHEfycWmYzLUpa@na_+Us{=v!d%8C2phwd9^ zGv+6ZB2%#UXU*vj^d?k|>t+JTjd1067Xs*C5l`_j7~5O97b&0E7^?fttM4BMF-9nC z8Ds`>_YAw$KGv4qv&kuxhr=tj7VhB5_^V~H>{QMV#^ZcgL5gb+ZXK@poP^G% z`bhG6fQ@J;6Js{IdUu5V@g z;@oc-SCycpdd68$3N~dj>2YUpl)9uy^Nk7y>Rrtm@Mh-49fD`Q^@yX{8xE*sandEh zIhvE?295WqiE(mbn|$7TpPlC}HSYW_W;;Pg?~y62kLWJzYja;CRr3m8Z!z2uxag~mK(6@R^nQd9Ec zJ2~9Q+6d*Y5ni42-`14umy!5|t2=b&<~ND`x%B!^m)*W)fj4qf;qMSYZ;2kx)lSFW zoSLtl_>4B&J=?I35wf zt#4UO)0DaOk<5Vx&}BfOL3 znY2k489@kWm^P4$#E)q5>yr<#$|9j^xV zctd1S6*`qvP?h$=T?=Q8lIFe?Ggs4~d8EX*IhOQj+*`3CQ>c$ddPa1yM9&F(HkTa3 z4b+<&uMMU42J8k2WqzTwn;q)N7ZZNG%=1c3JnvmqgfiC$CL`V=%Uj8dF`A#Jz81Ij zOFpLAPEuQ!T6r?WuPB`w<$lzzz@A>bd5-arX~a4!`RR);chjbxzZVtf1}tjSyDD%* zS04#2m-MJ|KupG1Gh$dj>T`4_sz zURzBu^VUt4)deE6Nl{bQCGpt)bRGtb{n|bUQCs<8`|G{pLTm;b2P?Hc5T(S)C~?K| zXcjs?u`U{Jh!&9rD1`mucUL@_4Wq!YguQdwj^UcspVxK!@-6z%v;ERhyOmISXIL-a zP)NmB<-Ky|Gxj{vRvVJEQfu^MpRTjb!1( zqTlBo8w5mqvU9kns@4Oqo&42#vy3P)h$E9XVZtJu-5O`&J1kN(LA~$g`&VMvotwzh z_0W77?NJ9gg_G1inIa zar(<@`ibt{*Uw~}NfW+lP+)0C??H>c@w3qW`G&k9<5o@#`Rmx@>jT9)B@R^_?cZb+ zgS`By7?<6WII?M5WK^2|GBi-nVV$PdU2%Us7SuC#eD4m`^)H{8(u=0*-|`5V_mYpd zzq0aY7rJh6)vcEQhFYyd`H0KA6}N6S@|>_VK9Y{NOl~@rhp+NCCxsob)k|lWpJAJN z9mFN3K)xrSm${x!cf=-d zJ3WoGIoNVmJQ80THE>DR-_OM3%e~>72wLqttr%2HM{dY~JFK)ah%=ATb;_c9Y%4Ddv(0M5WZI>0pj)#ZBW>G|Aj+`hN)fV1iKVy~ zq>?$He>Q9`bIkP9C5`W|7?P|RQSr{j2I^A*?M1=E!g(_kiWYV(c9!TE;nIC_`Y-!a zo_MDnAM0{+MD~nH^zTO7H$9koSN^;Hal%Ei@yy!^Aya|xE@3xNBr}9b%;IVoIX3YU z1`+AZ$Imbl@HSP7dZ)#&w-<4!>hifaI$hSr9t3iEU)kq zw(p3i-Jn$oo@_1QoOK(az)Vorhv=E2V@tz&v3Ns2U8d2_QE}A1sD7G}La#Q^cf#6~ z_Bf3z2!B1D=FQ?_%}(`72J1?2Cr3NdXE> z0J)iP>b^Rb-$22=r!@g*)8C3$$Rt!wCFp@btRg48+#Su`_YLN!lV1X zbT-Px9B1v_t*9oV9zngl zjd~xI^gl@5xJK^t!I;sbi6lL%bV5r$_O^q1Zsk_T)%GHRf_mQQM+O{PEOK^X;pVl~X-G zW5$CtuUvRam=u3&d4hN3*g7K(QeUWc&+0d2h#F)~aAlc02t2enR zi7oeVg~T|b1}x$XpC;0N$C=HiTddvV4fr!ND$KJ0)_hSj8htwB>u(X`aR{hyoD2;jvQbIglue@go2(l7Az%5m_b)m-oE zU)0ZR=cBxrKv1NDqtD`p06cA%+wVQMNN!BKPNQ3Cl|h=4!Dt)6+?|WI{o_0p(g-o% z@VoFxS^WP1J_zW8{FCf-Zu6_PpD3r<1pJ+$p+xQ9)s#k+;<1U_6!Xf$`U|536@IV5 zkfQYXpxS;c=idYKMOOyZ6>vSsR*m@|oMpv`byNHY?$~Q&&1Lh$WWm--j{rppor5)yKhn8S{w9bvF6Q#GqxW>Iynf$GjmG65Chfeczly zQkN0Pj)x4n_TzrER01{R+sOv+lGZR=Op`L2WDek-!Nb7D@1=aJr(JrYIPlsK| zRxWIO;a5NWc}8mP9IIWd>Y>0`BcW8%H}TDGnd;OHW^uR5KTb{&mmqQKOOz;@;H58G z6jNuDUwtuk?~CQc>rk$cb3(&B-T;=gKzT(8wS;9Dos{IYH9&p5@#z3?IfOJZ5~=RI z4=3qbp{R^+c#Qen2DjC5{f_6B+LWCi)P6523yFrS+SE@SSZESdP&#;0yPCR0FQQ@a#5} zP})oI^g;lCnjm{lKHTU47=c{Z(D`=N!LN_jU2e29e{x3_IPpnIiV2=M;0(IeQQ{o3 zBxvS?2lnCDz6T*NSRs^|SwGKmRa}-l+3RYhv`l)@xp8hS#cd`*fZ=nE)|VN_;oo*v zcLp|*u3#0;*BlB3rL@C?X>E0hm~Gop*U50{0Ypxq^qkZIFXCqgddK>F_`mqV5}qj=QmP4`w!DvRBA}c=B$`> zxZhtJk?*~g&Fj87C+*J#k+X2>rE&`_IK^4c5{;$8R$AFmBn&QZoSTR?{~lIk3<%}m z{2yEx2hQ<19(TedUv_4Vdnn_Bp?pdQCM)d8yENUrPccKqKhm!!|MIabX^SEn+KVCiVEX;|VdgbFv4d(5%u+Nxf z$PxmV`l8*zRd{H+c7H|NCZXRG#RGhiU6aUv<=6{Of8@KYl*)!yncqH{f(@W<>9&MF zCm$yH439z+alSXMv;M~oOd*yXF@R#y2=zVwe9ha5<|5C1zaT?eAl7jGo^BqGY~3C7=5H5| z@rG!zO_hN6pX+dLq$M;wzC4}yXYKwRD`g~hy-Byr1amrvwoOMKVg~nK{OFUD{Lgzy z<|t4!+~Q)HS++s{{>CN(<7*S^UO>zVf^jyoj%jhQF643^gukGk(=!dYz<{s=ue{HN zQSiR5Ca=g)PCJjaoa!Rg{wxBso!?1kO!XAPsV}Y9=cc&`0BwH(swGLEqLe0rMzFw{ zd-u3VtoX2&jiWHHM(&|Ye4^;BvjZE7b9WgZrMG)SITBfWwCuoZ+JH_bz;QnJkj7D> zbZ@=P6|a}scVsd%QKJ89zg_lH>a^3Z6@q@^e(jOauDUW!-?>|Y9IO>(Q4&`Q!h1#)an*XmJPdKTv)BVVqAVj{BEx$JO98!`BATT&GpTKnO_}SyyBp$%$K~E;AS~^hM*{iqSfH0fx3A7 z%w6K$#cvm0_-^M@Y%{crX44MUG8~r*>97V^nkV60Sd6`EPkGCoR5^_nOU_b4^jT>l zTsk7r{bG7YQ0bsQ^tHel{H}{GF~*qSa{LCaNy9%_b&WH@vsSE+4NJu(KJsAWB!k2E5P<$ zM+tmD|JSq?!GEw$k0<ioihYyKV*WK9X;ui7E@t^dEO5a8VUyCr!Dj9J z9lrkK6%^7EaS1~0ALQEEiD0~TMuc1jOarlEJSM}1T5d}1f5pnU_5!gk7t-$I(vN32 zYJ+@-4X(cRQjWD4vjkwcY{Hk7kWFUfu3UASyA$N8-{ zHNCkGCe;NsjuLUwS8?^^3-L^D7O)OP)LhFY&pcXGZhwp?%O+lpzd+&&A$rmm`+ShP z_j;7$XZEb7TiU99RJC^~p2dFjl)nP#|U2Z@zw& z*dAaU_1x&VrM{qC{qBS@QvBsa@n7dk|As1krLR#1O6A?5*j6|=Lz9etK+5)ZHOJ6Y z=8nuM&5Ez`RL$ZWUq~aRj$IMd-4Qa679`I=o97m+O}4hSv8GZ(qGD_NVLH~a<68e4 z{91_Jg}^bj08B*S7eU5=#Y*S{Jt(w9v?s_LID|oLmU7KB?S(SvYIWcj~YSn=an9eQ{X3*Wr*k@05w4 z`WsWa9}*sSiumn*7(*z7I!*G-kv77ehO^vFu=Aw7QTu%7)Q_ysEYo{e7l`}pz+iSr zko_&78c4rLp!g3KK)8gvjsBs@t#Z&`-5eT*Zzt_* zxg$Ootaz<>{7!Heg%c+3(^$k>%>Tudq9X-#iup6j`wBMF2vOFgrOX*qlnCjc8ITML zH`Wp6YCU_nz$lviFyP`HHq{RPzGKJ-MO=VXtQ*iizh$yd>!>X8j@vtC?;${V50QKV zql6kDUcxI&|CT!WnP-_!)`{e;y7ahys`|K0s7^gnLlMQpNqXd-`<1hN_+`qCtQ5#= zjl_YvUfb+Z&pLWivq4kkF_IBybP0+49$o!<=HX=Ik&x>1cEVhTd6}4WpK4EEm9f&4@NYn81KN*$DTZk!v zuO14`I3)fPlb%aofVW)H*o1-(mY|bq^_wzxM*eH(`-_rRF*ixruE0|?Ju*|b+~@uZ z{8rfpPV>>r=$q7zvYxQ?Nlp1tYP)2!m@~8#+%>yq?TMw}I9kq&Ep@LyLQ>E#+nzd-7^kr13jWk30d}-l{Q<-?%TbM5f-d!&>`Sfx}dSh5kM@Z)T>i))v??x72dQ5oGQ!YYLj;f)<4G z_WnxlEZb}PqKR6jue-}Fc`(@3QeLx_>w37>+C$#u8bg(DCgXW~NI~*RTU_TWL+zzz z$lZVB+Xgn&ApBE(mW0$=u(~Ycp0>l4t4UH#$)RQ6JiNH06At-n$reuy8 zZ$>N~sj%crPB4D~m+wOKOp}#xV%^Nm=#skogxA53v54C``pjOasHzV?yn*`Fu2C0X zLx24yKxw@f@=2&>EqtmR*z3%=;m5i);Honp>D;l-sMz{`SKD>^zS44T-}!jH&x#)@ zIX@sFwAk*=@aQL6z>GMV>khowA3}6e&cA#2Vg2dfssKr!U@v-(C;i4gUi-K$(M#ym zg=TM4LRTMFmr(ItHk@tV_M)Fh4OJ}XeW*=8cuhz&ac`*{Or8LX$tfR3@ztjMc(+w* zg2rq4?z*RsjFDISIB7%l9@NSidY)eMi^eDJqW2pQBKKU+xGX{SpUB}Od3e3Lpa+UQ z>nX;DHxj_Je_O$>rSS}*sPz^jRY(g2&voVo>(R-)y3Zw0MtA^_;9Yc|29zs~0Dole zn697iC^T=yS?QRvPwmoNKUPJv_FNAmShYy2NibR3y+f3KCeF}BpTvcNiGYmpbo$N6 z^8}p6Ax}_Mgpbp3lzz)&5ap2gCrk2-YH`j(6);HOUuTKG6rt z8r6EtYqI+5WcWQ2Frd?U41*^Odp9I?OLWI?KaU`x5*-`2ss-lA$RA-2|E zdwi{Czb`+N-OcvW3Q3F0^RDBA6}m7M2A0CWpqn-CykVA zOUq6QX^V@ICXK@ZJ0SD;!} zGrqe2HPV~RChx<2YVaX#yVE_#L3M=^H^|pSKopUcf7xqZ*%-g(>+!7AiAc3=3LoAN zkCkd|+NRo{PtUIA&F{V>#vpui&xi<)bAI)p?YLM4ZPpzl9IZg?EKvX=l)kY@^k*=LakfxH9FKT z6H!WxKu(M;)&*pW1bAF%lAu5>i`^iLep7(Rg z3+(NU4k5aaQ}>lrved-?h%vvi# zZCeV1!voQVwQO65YM09<8!j&*=QNOk+$UHC z;=ILLRX3aZ{+syl4(fphoE+4in3I3F ztjd}(s<)>D5J1^0*R!92(&Blq62N~=)R@>g+~_(pLDm{A-a>j(lXu5su@rkv#bd)$kFeR%r=2+*P#Y^6EXnfr z4p*ZI0{$;Vh^FXr=9QDojn&Uy*TgX0_Xf3S^S`l>oUFHV6E0yhrsQPZ+aq;!D+u(; zXcXjUd!8RN%aXJ^koeivE7B&Wu|rIIKPTCe=TvLZmD)3@V6_MLdptT7cvyP_BqoxW z@jOG~Hy${*{?AYqCm7ih7(O>&4sGU*WleE4D>x=e52iO$$}ozeNN9RKE^Oqr)rZa7 zD)g23X&8lxlRib4pE&NV@6U?Pzx=ps;QIY)Iv;^w8}6wd*__0QU&(ZM6B;baIC?g1 zBIhUDk6+xRRZM@Y!f47tsc1qh(GcVOuF1No&*lsxg-YL=-;Klsd`5Lz9tztJQ`%&f z5=`CJ=mO&btk_Qlhk;+!W0`p0$4im*rV4I6KR@yOt4b{Xw9xKU{Vh>5Qq6WPhQoyq z7j*RKPxI^5-9h+#{`hyMEm_}c=oPe^c7#elU;kvbSQz!?^Us3YyBiua2W>rhd_J*P zOXm$8iKpA7WIIxj2u1g?tWQuDob0b`_n( za*?fBg1(?*s`%Jgugw|tJT6GFt&iBwZo0V(v=?n^NHb2?82gKXZ8f7^wa+Zl7AlK> zjleZe7A3s#D9Wfq66o@S=RkSK+&=Ejl8pI&kMDKcEx|qW$3cExve9PQ@poo9X$O9A zw7(y^zR)oAi1rOq8)18V)}9>rrF75|?se-h`}TQFSLrB;?T~Fg+&b*%ySFPpA5y-l zi!gnh@T)b#OlsprCr4tEjD-=cP>l3V%goiK=f9d=`AWm|+7tc9&F@8IHLl$SsoFPe z;o;@SH`}$RDGVlFT*Q{EIuti{jWEvm0mb3BeOh@n{d=kT7_$!Iy_Zr9Jmj8IyY+-m z#b(7L5nYks;Jqj(Q7CsZtk;IE7#Z5gueUp0corWdZrEmZ>Nw0_%Qy@~C-t9L(sP2$ zywMb#)4>|sZy3EY+JER~SC4}17BCE+irqj`1f`@sq1 zk_#BuMVm8)>0$OSaWr=!_dP9mHm)|Dv7M#Vp{c_#_YrQ^xn3{O_ss~rWz3J9Kua}H z9DONL+-Qx^B+UF#*aLF6mV4AzXXo97Ll2!u<`6NnPq4GJr@T0^rk^$by#C6d5aD54 zZN86BCvR8`N>sfa*XEi}!yXBqhXE8cZ3i4wyEN|AE(Nyq#t+rqKC~Cr$0g2A*AQZ< zGj}tgaOFpjdE=M#8XB*TyKYh2$hrC9+$iExUt(kojPc%zphW*PeZ!GBY=3*GyvA*_ zJgH`0B>zjQI}d(ntr|+^I&g<%w>ln6qg|z_^EEDMn2{p%n-wt@E!$K$O;~hMOJY`w z{!q{B2~K&G9esDsjSDFq4AzoU08i}vi~Ad(kMe1+2Dl{TSg!x z{!kQHcx102_3IunpZ*`fHA^(-IX)=jWug4P-`+tTqdelJV}uGP{;A9u-YgJ{ex+VV zjTAKqOfG`ALo#;*wNkaA)|fk^?dQo=GLDvy{sUHT(gS9W{pMO$ECXhnbQhG*WKfK$ zRvqszYUg~JjFh4!H*z(l29fC6fO(`UZw>L^FYW{HlCAkw9Yv$^6GSXQPzmwv4jZR; zAP|C|gC-pUG_`YmY(feOE3HYzw0Lw*45S|yp~5ONDc8U&iL9e}GE8il;&vZW+M01C z$XWz>C&Tz#09~UoZ;c3lm-*`bc*#FQ{dX=TsH?dpTEkFlBblDyuo;^5#swh&V1$x7 zgZ`{3W@1+|`X4Mn3Err5iB>u79y;@6vutgt-C-yDJHn~l)Bm0?0C0mP0iC3HtdaXX zWE*FbUodwQqU2+wXCbz_c+>r8IM3`mmt?wTXI|_`xOLalg^eWf4`O*5SS}9NGtM4c zwmv{LOF9`Lo#r#*S~PFc#B?J0tVu6_a>d^`tpGxO;}Lb-KrOTIXsiwXa=%-H)WR4d9#uCu zY3)Crd^bQv5ovg;J#8fYP1&$r1}n!BYrdkGc7kTfe9IzPJb!<2$3u*DW(GJ*gcj*G zfIXO57ZAcLNtQNV6WMrh6KA4$`9ksUKZ_>#+Svrbd+Al=UrK+#y%o~+&ho_RvgE-* zK>DN7fuY|tyfh^ox%O`5@0uv=%g;mAjrbyWZ5zoWa5QY9e#;1?S&)uUi+|jE6(o}j6aW2qYCG$cxXTCcLkIxbDi=Gacak5gQoUXtNVY3cnxgDu%}P3 zBoXhj}O z3k_`=kz1ml`ZA_u%&}Z$$IK9GcnU^Z^gQBYL{`fX`alMmY3%r!H58@S^c5S;f3Km{ z1#c^j3i1Os{}Ve=8Wnk*Eq);lt_W9k-!=al_b{<8&- zLK_I7f7zk51bA=98*8IKGeZa>?iWSLca8A`5_~6$+oU@nyMxXAo6Gy-T~ZP663>Ta z4zCV}!^U3vn1MjA?=B8{_kG@Yl^q_}Ql_?2jgaj_6_22KGf-#k6Vk=R0W= z#W@m|)rhYwL1tWi$Z3emV1@;?>(_V0Q}~IEqAiVwKQz~Is-co)VM!IMt?wdBN+1DD zWjB8r8ZSCjTFNGScZroJ9pTvK%wYUnzdRGUF!zzo4}G(ToX?=SdpP)AYQGGBuwX8d zXn2+pmB}H&hm9S~M^UWvfpQu#3eoX1etmV(a<0j;=e?CYOgoOqs`O})B*)J_v*B?! zZih6gnaTGn4S@ddharhh-Oe3gle~LZfMe_FYZ{p2MY{^*ews;O4O#Ra#MkH+fSYdVMmKIC`EOnpMhOY>qI$hhYoq*bRpHyOMl1}uf$(kbJx&+o%!ww7Kp4AN%G?%eP(Cf}f8AVTmcCFl@H0=Vr=g#gLG&g&tj{AEM8GM2_1< zZ*JK_!iL5$h%5B6;Wk^QzX=0wQ{YCL%69&XZ6_J(0863GBD*Ck)bQFyulvUoqZMw0 z=06EZ;$;jygNERNkSv8#Om0JQz(|aR{=Eb)L@48D`P{hDf+X23`9i8l@UKD3!Jqki z9Q=`pl*XdkN7$mor#fck8+L%@Sy6A`HkR&&%#v8)V-Tq7GvBwt^7@dj8d=c|gM)t8 z+PuSRb)394vJsl%FWdQv9%|!*Pe*G3sxkx0T$EDa=u!pLd_UG7?cP5cWRpykE2`Kd z>p#BG#WGtxw<2fmrR{PMxAUZY`9jLb_sKF{)M-XIoqBEy&{82--$ChrzNPh%t z`5`y@==W)kh5awB;)FEh?R9W8*kPk1vUO!jX_Mf@#cD$B($Jq};P;f8qD;gZaghEr zq6eAQG+4?DqKN%2KB59Sl@l?XPIBim)fj1gczpXe?eE1@xC8Qd0#WdS1c44^u)9lO zzdahSe$!?RkOp1MyqcrnhO5`31Lwaiq+~ie;MB_QlDZ+tnfqM#rsEExnkq;rw7^d2 zn{8%Dp4XY+u}Pj$?%qt6{t4tk2-1Rr;1j!3tWuPKa*dKDZUJF~=wv8b>3lrJp(#l_ z%=kU7PFV@KmyBi6t6Z30h<8b;Da+`?qlT6hM*tFRI+U?RGd*9O-NZLt@Lgb{olYN8;yi~3CR;g>Ef z=GS^74Ry|1vj1nwV(*E~+>H-&)6pm+00+npepR*JiiMf6PT&AaI#NVm_I}jkgv9ckgB&ZIG>13r>rq3w9rF0-9 zT7K0!e&DqcL}3I}RU{zoqGT!8cqzLP2Xqm;To})Z6!X`EMI-^RTq%|2_*kccBquHR zCuu`0jZ=0f#cx@>%0~~$nPev1SCZTCcNd)-yex`fzMIs0#;9pTd#3B7e-|8~7o3Tm z>lKUms4%@_pg4ZnEn*XjKmTsO?e2ORvw+5*&0gg&h|{Bo8Ee9|@`{xCOK3qTayIWU zefJmz-qLZow7&(~8Dd^J0~~Y4J82jsJC4xx^7w6(G(DWlvxA&EN^qW{rE9^ODXG*^ zrx{j}zA%KRj0YOV46>DTDUXZ+uhl-+8cs!?mGnqSwB|nvkcKSGonbq~86f<9IdIq9 z2J&3=7(}Il&^WF@ZuyZnMpE);mS2FjBx`EtUd?&We#0l{_qKP_^2r)dd2R>vRnHuH(?6n&jTp zl+XTr7HciiC&&9}>XTEF1ERAaW0iWsjo~9=%UR{X7Dq&XWKj^(;*Y-BmEO2l12N-v zqN<)n9q6Ep+%8!rJL`z%tN0*2c1X{_;oxQmwp21YL=DEfNn7L%=H?5ge8xYQ??o#o zLjh!+tTD;Eaur4*&(7P!t2LA zsdV2%flB@^qKJyBsFAY;@)TGx-6jY&@lRcqya3@kCmEU`K01aNN+E0$6#juHOd49K zMx{F|+qZsbAp$IL60v#hqVF33&%DF)G~puxViaw!Np>Q=I>G8F=$QL5a4$+#IH}1( z{~|&wBc*+{BWv)_V6>7n%(0_@Uz|75{ zk8l_?WU>l(WevJrKSU_n2Bh5#>Gtb!tN}wZ*D~7s;DHy7R0;&uw`xT1nB$Q>t3#B4 zP=y#A0y6YgpH=o0q01DpD{63uZARd?l+TJ5W7^UFIq7#k8hL7Q=Dg@!+9nxKe%U)6 znH08diIBcebGk+-s4tp98_RLOR_`h}lVG$9^q9tjv;t8{ynS^w|LvMY$nCFlgEvsw zOCst&@94deL$bMNrB$+LqoosZ^=x-YOge_4p>ncYhEO$?AxHkHrh4WNuSm0NqWixK z{`0WekA+`_!Wun9A2wa`M<^+r0`SfI z+lUhLnXN{YK0(aCbp{30iliDoV|Oxtog zC`0-M`Q3S@IvH!E;q?o2eHMc9vz6O>E%dx?hU&|@pWr~xbahiRb(^AXBcSFoMj@@y z;?M@?h&1Vfky62-b2tPrc*Q}m1rQ6QUh*XWAq}d(0(6Sk-m;#@iciy0;%LHl5}}}a z&y$N7kcbNcfmsj_6QATy-BW=cQa;DVb3#r!jCQJvxK0lh@w?9dGJr-+Jb}Qv|0~qs zH(Nd)a8cqyG^wli(L;@IX8|#w0IUcn3&GN#!Z|`c3j{Z6idRx?2Gz-v>+Dxnk$4XX z77=I`+;Xb7hcez4$3#nCg<0NJ{80ppp+yRQqw?Tc)jD*HDy)|IGshY5AzqZ8y9>qC zeFxRSt~x$Q9bwtC@AMBAFaydxCp-wLaZXNBiEY};jPhYY2%FFet}-5M^B}?RD|0su z?CQwZt}-nMMhqAO&&rwNU=8JzzJWq>bMVlFH4$3?haj!qw>GZoGdsNRi>ph6O6^QQ zR};*m9MXFin=bWH`^*9Ek4r_HS2Cdam*XO|*-e3Kr&FcXr^9m+!LXd|;>xn#03VnK zHQdtsUwMNAfXB0XoeRysc)Kz^LiGLpOV~^x)P~^M6K17NYVeYI{I>J(MT zBXa_@eoNs=VO3clJ^Wp-{gbZ1JOY9GDtl+ni71Z^Af|UN-@Oa_Ob`KT$%lt3NI{i$ z1s3x*hunGz7u-OFRI8^(7+^-fF2jr>yvt(X=SdiUc=5tvd>N47R-#C@_~O-HV0%cS z^3-V*G*nbqa|&+U)6a~$-t+r21Y}c6E1eJ6Bo}u+Zacgl2_)faXGyV40C-Rwdznu6 zwqsI5@hLCN?>cDMZ8rJz+wj|SvM7YPWfjc6WvAt-v%2G$MdkHA(F#HL~@wxV(6?#D)w4r zgiA8pFl*H5NmIticllc%^M>xwpUp>N%Z2uCcb$XD?z!`v8!WR%pO&if@CP}sUG2-A z=vge8VK%M1jO%K64rLS?1rkGoVX26V*efq5s`5S$ejQ;71%m?t1nzFVX zh{*jEvlOHcgyducLcwpE#_c)>E7o$+N7k=>;x?2Q2gz9S^|YQQ*6N9|gP+Hv(?aZ0 zdF)Bsg!o&AUjgcsTO?jYWY%ec(ySp}Dfb}rz1DudvHQx}VAZ9>C?r4x!y8CNDrgu`h^q(D)_LsDM=ewiVMt1etz^T1=Y*Kwnup$j z?xAGSvs}a>kZ=47Km2~zp&9Im9IB?_dqfKXYo*}`7*Qk;?uxVmI0EV~Na9>13=%A@ z@*2!m!+y(_0ZNVqe2ePJ-`Ymz?`M0Cw=$+bpc%+gj7~e_{SH2LSZA78LAEA59j$Hl z{$1umxG^VzOuL~ZV@x}Dt>>^~D^2wpGy(XufYgqPZofOw48AuJa@V2yS^)BW2sIEv zIemHNu!1D;Yw+=gR{B8dr@D_2*$qe0Cx$i}gh`KFKQDE-ihQEv+uR&{*Mk_a)%)*R zw=zb1PY}v(ZLs^<9aH~BCJh#pA*iLKo6c|WC-vZxKiPEe2|DtYWqlC?T zEJh%PsqQ|$7MJ}&@e~SQoh|yuImKm=X6`cmq@Was&w!YL{(6No%ZQ@E0j3d(be+1u zx>HDcx+*<^;-?vE;5h$Dpm*s&ZSX`Kx+>N{G$?_TyE(JaR>L|86+$FJ@V9X@d-@`oo>nca& zq!G#j2}nljeZ`Jj4F$XdjLXIw`wdB=k&p*$?3b`}?zgFXzm%tVGGv{l2cqVQF#b;5 z?|d)Zs(JjGw3nJ1p%>x7(o<{p^Ew8O%>vAl{RWSlK{Ga1fuJgYs4Jtkko@`ei?14) zox!OeVM;=H?T6HVec1e_+zM$N1r+}-pQGlz&~{wn@3?=DkhrJuycmatCO3!NLq-6a z0JzKQSs1KXFm=dyTIz%6M+HiCXyXda^1vo`%nA2|s^@nnN6%nR+`U^xMidNuskDKk z@X;Cv7hYJvED6R}IerZit997Or$3lZ9K1IYtwodHLG3#xzIWSx1`7Af-oJJ}jvaA5 z>r5fd&DwN~5;V&~=X>V{VSFv1;^ZfOAE5QyTwh9sj3q6ms33>D~jg zPabRadq7W!S}yxxzXn7(E3mWVjnB?4cI4j~K&tNVh77`#8TW>g&|zwwP(!SNoLC%b zqN#xIY?wi#M+EY%6Lc%mt1IB;4TapB()hMwGk&wCAQRc>GtoqGAe}V){lN-u&+)VM z(D=P8eyapdLB_o2AMtHgo}0^7!@BS)O9 z#)lK+g}qwJ8B0)xL=2etoqPHGaG+H(_V9{O4!H%s&tCx-m9#fhNcD0Y)f6c^lOwJZ3<3HY3k+QEwFteERgCDf7v-jGgg zr{ov5Tw>>hTnWq^rt zt5GFBxzl#m(?|hiS$m=8yNZme*m6l!AAft%?x&lQB>J(c7rzIc$v3~@za1V*XR*DN zIyFp+=R^;lU{Q?b?wcbUi6_b&XQ03l*>AgpP$v{TSk1G(UlR)2&|lQRKjcG%113T` z&2S>BT=i$=sX|q#7B3;B(J8I0o8vdfp?Q31^ZvyWFCAr6E@SukwO}vnk^s+JPxF(m zT^e(Rc(;d;G5waR*}P^^J67wbFOL?tX-^`RPtQ7?mAofy6gTBLfg|ajHTJF=?mRjx zqs%C2O}r8%Pf5Vj<`*pn$ZgL$6uxbqf{n!cE?0D%c*Y6g#p@)AQwgP3_XW__0Jf%n zWzM1e1t^}HO{d%KyFTl%n6ncg)7R`{s*>btFN$`E^(wfjr>ojM*|zwzh$B>f0pm3_ zznOCH+|db$uU;(NsNQG!+0(2ai=^wlLT^1f?W~oNDk9{gaDZtqIFAljpu;Bi7xN=^ zjog@BP1oTQ5m6~54wkCCGg}0k8hy)1g8DwELl+gm(2c+5_7ig^lRI`T!cg)y(3=N1 z4#{(8pFY`Ne-?_gw#`Rz(SZHC^wMY4gh?JQY) zCLZweU0BkK{!E@*f&C2U)#Oy0DMGZgKHKGPIy0^BY7w5bdW+7QcBj-TuY|}Ri=uR?EkCl%j2Q?{Q}X(3|JS1L=0EE!5hh3q0_iHtC!GQ?EKmMHr!&*x61 ze$VT9UjFgAbI-l!-gD09ob&#?-|u@>acL4Ud#W4oo0kk<$_QiMIJGqQv^p*5G$tIu zR59EKKfxS$;IgsVp<{)8vxlDVI}7-n@rC}r~o@7AJAUBLh`+ca58j$`IC1YTYT)`teYPU4ND#lf}%S%1b|1?8U3Q14W}O3;JR- zflYao--8U4;}FYpUG)fp`(f+o%t&So(}gbQRL{4a?+apO9MahaZ&_yT!-A0MTOp)l ziA4{Y%WkyyVHaVg!TliXdzb49Xg?^YeSz*ao}o~Z;CHnTkyZ>G?z(qv`wdI;rwE7! z`_H!m4d1?50Fv^_m)!&mxNch}I?;DhzjqcFzK{PLA>VxTBO;k7I>atuq*cNy@S;gJ zM+TRvgbt11!pcZ_ zM@^a3f+{aqO&R^LZe4xE7__*Elo44&ESeBzuB1;3S*x$qL;lqLFOv9YN@r@M4EXlg z@kXgSIxc$R85xBzJG9YlE>}D?l@AAQPmcBW5-C@W_hkDYXZMV+gV>g`8&2cI1#%1I$3_MRZ?`Mk7j}sl@OujM=jzHRXoy|Cn>j#J8 zO9`bvTjk!xJ9n5{8arICzVMRM;kZ|Y(~<^1!xlN8>BDhHP+>Purj!R7&aJoOzyzZWb57^h*A#YZFwk)k#Pamw5W82 z%m*vy)^Z>Gkwd*iN0{|AiE_)GoRKHJ9?F?H&J1UTQ#ELxmWR^IE6Zcx2AGJsuMnwc$LWI}w9z*7S06aIiY|QE$v!o7 z;O4@fdc^=i`=~^t6){bN6oEWpak+$dF;PVK{n%!-215inaG=;mru3&)&a1qcfPvTp zah%pbXAO&OY${kunjI1p=jI!9I30CF+1YkMKr%Pw%h^Cg8lo$h9Y$aicWY^z5hSiv za5^mUDw3ik9l~Rqn`FFJ3@3uu=7Y!jA@#NqC8?+AKjYzN_}FDz!q+|dL!3DgWa^bK z0-p`rtxRf`EKzEU_L;JaPs;mm$ZSt9dGOuexAib#{&{DBa9ZFhI}l@24VQls2Nggzl+^RriCD$P*3+NKlbCW)!=Atoz-*$bl)VRw3Zn;93W1|li@)`vKx5A{qI%;5aN4-z7Ja>2b%{F zrBZ?9H5GF-J8yGwYlNn05Ne7GN4$t>L*GYKTeT$zgO3M*E)iuAsgi>8bTnM_shx1Z z`$sWrqx`GTfq_Oitp{^xy$}DY+>}7_^zYG=hpL9 z>W?EPzMPF=zFSa{`|3yPfkE{P&jDnA_imJ$G(9RaYQvX3h5l4vpOOZesJF#e@z}{> zHTvZ@K0EKkSk(!sV#E~Y1X{bSmcnK;6EZ(JJuNIbqK1LBT8NL_71?^Ba5lyz#9g7> z-Jn-=cSXS9-C1LtabfiJ4MX)V_bw$G2X!9jVBruf^omzW$n_LcxbzV>83-;CCL9Wb zhc&pvbsdiNB6@xiJYoxH=SrvTfOa6@rEIf+|Ggj@p|v(z%GmchW#@Lk8aoR~&k}0W z?)@6A!lr2LpuoBghZh1@$Q)Yt?{%}7)+Jvss%JkdWk$i4kW^~ThMVP!aqlmCXK`=n zkKUmevDbFB7$gTw(O-v~`~)306=>#iojxh5%ms#}mqg}R-V}KhRgcT(c!v3*S!(NU zD~YGPPtMY#UY$CoqE_9dZ7}}`nD{Mn&63|U`-K-xcX_={9&HFOWa2(vjO^f&x)cqh z-W_BVsMid>6S*Kd@o?8TIbKQDaJBrk^SFenMzdu8@$JILU!U2{aSnFOfv%6!oHqLV z4Qe$P)llrsxY827t$+lKwyVBQ|8p}Z#^E}7kjw}hBnn6Sbj)WCN|I6%e($g(LCJ4q z#w>m580JuGcfe9u75RXd`-d+1)Gi7z>3+z(ToW>Y)t0?bclwZ>U=qWZCux=5L?=1n zg4!g`25FQ+u$a)kFwv%Fz328PrhE+ytp-}voeGkK9qUt$Qmp%8jyN{medO1qGjQ&b zFTy zRW8!)n^9lraR$2nA~#D|_K&tGMr$>`sjxmh!|gS2Q)oY$XV;~Y|DG4RFC1aYd>|;U z#QLT5FpDLz!TQ?aQ{K~u(PDh5Hspqvo0g;4vP`fqS_w{Uk-TQw-PeT~wrlRIc%KEU zUwnvCsjlfLuG6FML-K8n=jLb`ijujzq&+$f7{MQ15}iUO^Aokj<8HUBMYeC)Z1X*4 zvOn$8qJQxG?ISrtxqEXG=Ixrt&+bj#ny%_(y|xrtWoHvJTg|`fJge_AP-kOAJj$iF zXgp|IvYD%XtkG`D85PH+O^GfFkBv1m;F!kp`FsiAqkZ;t%$_`F@nU(Sd{sAzn5I3} ziCNafL)A@=-8#t$L1r4^0&{MMCiz)Ow)0!xnH{>u_MfK{*5Ho6>E@r*69w$I2eAo0 z50zczRRM z#ji@@&)5lxxvyF!))`(7Rt7J~BKdD;i*#QN6Yq2?7h8 zNCc*G$<1C8ZOJG^!j8Pbw!A={V~Em9{t26f;e)hXagrD$ zwzhDw9YGZ%l!`Wbl&ZkIskD}6hPY=a$_6O4AH>>tX;3{6D|NOl=ZMARhwH2MsS>8= z;jQ1Fzcm|QBf<&<->K{y%+^lvcdpWjr!^~J%%ru9-r8H3F>C<1LtaN}OZG+z!qbHu z)zIAwDSLort2g;!kFw*Tj2-|9DpxlatRnC`6OgFXS^PTUMx{d75zu^sz099`#F-)R zNel-cGE2nUH5P%>@+@V5G`s{rMAeV%0lB>ZJ0OP7{jqNl?^~a$3c!g^HncJBoBY67 zy9W2&2QU+xz0y$uR;v;e6RsDahMUOtdG1rdkJnQoxo@Wd%go0{A(&B`WkV%c(S+_= zcz`Fc9 z*fpeS#0k)AT9k&uF4P)O*sYV0N+WvP7F7e5h$#H;woa45U6}{usOKj7S0_%HGhBH! z+>#>%8L`I?A-QhJjiw~zm9=xDB|`2?qQd}lpqFku2HI}i>zH0d`6>Jgr5($?_ z_w?c!A8#OqZc;#lgx;Hj+1`RUO+}ih+Wfb9vs6-Rux&6LW@kq^1;26TB?AUi8UgoYOYfo04-dEEW5!+f% zRy`4C_c}DGu|rUyg9_q1$FVM2j=*A5bgcLRCJqk8l-Ij89ZKEmx6{}oG!zrEE&;8Q ztDj{AQ#;t7yg10}4>{KJ$}rN@=Bqq^$TO5V-$1bgXDx+;X=h2BUhU|QspT-0esb0v5TnDiK zS{*{A3!M&3N!wO`7Lab4G3fBovz@23g@zFsy5wL&dgU*(n3C zhtP#@LDC%Gi-Wd>OOA`!CBsFMA;dIO11KlgD|;K^!Rx1)=RE#f7^q+1jd$-(F5>zF z1!!4*fQUUfZ?_|depiy|0FXHM1QDO?*ZJ#k*M=yFIXDmhen)9KHL8gBz!t*( zv8~@E*2`)@{fZeiJ0v^)@u(X|?jd!h8E7lWZO-%1_jJ!f(UZZfEPq-CnA71%ge7gk|i*9 z3>EE8)!+wD0vBV|-sd>P!!HAeDhQySF9zOk{_wMXe!MZ2Dsey>s_C-e{|K}KDNNNq ziwr)5T`tW6CQG^+_f1r`5H9pW@GRnL8%fL~JI#$>6(zz4GDs+?u847R!B6vcue z9)2+i*XLl}ZBS#CB?Z5o+9r!Y{uC>ae@y7!2SXx+Ov44$S=2b*XI-Tw(8Qm5iIvrx z^v3L))(hL0t;J11eaZmd1=D}|JFjmg?5rpJFUk5w>)or%VmysSwWp@7DQx~-4f3f< zVJkMBKL;h6iY@)hGV&4sVBXsfTptbX63i`F4`@^t4mZDZ9zW&3^w>iMNC9~yNs48k zdTiY-ul&mI&ycTHc;Ppnwp{K;#I#q`S8n~BO#A;@djOZe zR+&Z9mf%rMdNr31Ww=7$Hs0q7wEJHj9y-S?bh2;linxFJHhe$<#_e)c3+m5#C8_wq zJIMO%?y7U%-(d6=${KY#Sy=s37<`ZZb17UVfm7nIL2t^-K=Hi+H#XBhjau*VA|cK& z=vBk+`$HZ68CFOX+~4>2n;#>)8D6=?{ML5-i3B<%SPES9=f~5n3bx`6UEPCNCf4w8z|Spb@#HvkmHUQCF%*f4zRDD2DyK zg#EpaLjS~{6#a~nmyoDxR66H)@ACDk3p3^^EA0UwBoJW`9)j>|-QQX~ z`uC}D@BmJuc{Ue1fW7g%A^+EYDx(X9n5MP;|Lv#z|4cEo2=c%7QyI!ET^oTY49)%j zwx1&Y4P}P^m;Dr&)Y-XLBJ)E}K3&i6SP6ApU(v2R!@HOvyMBYmlbR;HO`; z0mAei^zU zpOyLh_iTV)XH7--?3{CZ!Ni#H@7@Y-d`SGdrK1g_Vwt}A;h&m82WOOO3+z*$c$W0H zzJ?iLFvv8^eLYj4rP5Fyh!bweedyS{r7pR5{>~s6hLd7g&D>6e^Z0uJNQ=UoW7Q%&hEeSnum0{V<$(vv=hl z{HK?-alr+J+M`tUel;29JgUVSn8+{z^B(+VtNg3p4?$9Dy)7o>@2mxFwq(@1Mo&k# mwkBG>CmFtx#Dpwr80id?d@V;m3ns!5x`X>o^q=V2h5jED3bADX literal 0 HcmV?d00001 From 5ea1dc378f8d8e8bafce8e52e6009d3c2b4269a4 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 24 Nov 2025 01:15:48 +0530 Subject: [PATCH 25/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index ad6e89cff28e1..8d0a608fb49a7 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -61,7 +61,7 @@ In practice, however, this approach does not meet the contractual HA obligations To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. -![FM On Baremetal Solution Overview](../../img/fm_on_baremetal.png) +![FM On Baremetal](FM-HA-1.png) ## Configure FM on baremetal From 724763d3867466e0378ffa4999b6b3b93915b472 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 1 Dec 2025 10:37:04 +0530 Subject: [PATCH 26/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 8d0a608fb49a7..d1f0dd053d873 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -69,8 +69,8 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem ## Slngshot Switch Firmware Update -* For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as before. No changes are required in this workflow. -* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. [See](...) +* For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as before [See](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). +* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates] (https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/blob/23870cdfbda43c015aac641d7619faf0b0003634/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) ## Troubleshooting From bbd5e1d98c6aa4eb3f8c13b7214027726222d9da Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 1 Dec 2025 10:57:55 +0530 Subject: [PATCH 27/94] Update Add_Remove_Replace_NCNs.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs.md | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index d8c5487c99a3e..1eb4f68f34597 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -150,9 +150,9 @@ The latest CSM documentation has been installed on the master nodes. See [Check Restarting cray-dhcp-kea ``` -## Add worker, storage, or master NCNs +## Add worker, storage, master or FMN(Fabric Manager Node) NCNs -Use this procedure to add a worker, storage, or master NCN. +Use this procedure to add a worker, storage, master or FMN NCN. ### Add NCN prerequisites @@ -199,6 +199,25 @@ XNAME= * Ensure that the NCN is configured to boot over the PCIe NICs instead of the Onboard 1 Gig NICs. * See the [Switch PXE Boot from Onboard NIC to PCIe](../Switch_PXE_Boot_From_Onboard_NICs_to_PCIe.md) procedure. +**Note: ** + +For adding FMN (Fabric Manager Node) to CSM there is a new prompt added to confirm if the node getting added is an FMN or not: + + ```bash + ncn-m001:/usr/share/doc/csm/scripts/operations/node_management/Add_Remove_Replace_NCN # ./ncn_add_pre-req.py + ``` + ```text + The prerequisite script prepares adding NCNs by adjusting SLS network configurations. + + Please enter answer as an integer. + How many NCNs would you like to add? Do not include NCNs to be removed or moved. + 1 + + Please answer with yes or no. + Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] + y + ``` + ### Add NCN procedure The following is a high-level overview of the add NCN workflow: From 98bc4f275f1d29c8acabea7c909e8b18f2dfe26e Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Dec 2025 13:35:59 +0530 Subject: [PATCH 28/94] Update Add_Remove_Replace_NCNs.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs.md | 58 +++++++++++++------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index 1eb4f68f34597..f8321d9e276f1 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -150,6 +150,45 @@ The latest CSM documentation has been installed on the master nodes. See [Check Restarting cray-dhcp-kea ``` +2. Optional: For adding FMNs (Fabric Manager Nodes) to CSM there is a new prompt added to confirm if the node getting added is an FMN or not: + + The script `ncn_add_pre-req.py` will ask the following questions: + + Existing prompt to add number of NCN nodes: + + ```text + How many NCNs would you like to add? Do not include NCNs to be removed or moved. + ``` + + Additional new prompt to consider these NCN node(s) as FMN(s): + + ```text + Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] + ``` + + Example output: + + ```text + The prerequisite script prepares adding NCNs by adjusting SLS network configurations. + + Please enter answer as an integer. + How many NCNs would you like to add? Do not include NCNs to be removed or moved. + 1 + + Please answer with yes or no. + Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] + y + + You are about to make DESTRUCTIVE changes to the system. + + If you are sure you want to proceed. Please type: PROCEED + + If you want to stop. Type: exit or press ctrl-c + + PROCEED + ... + ``` + ## Add worker, storage, master or FMN(Fabric Manager Node) NCNs Use this procedure to add a worker, storage, master or FMN NCN. @@ -199,25 +238,6 @@ XNAME= * Ensure that the NCN is configured to boot over the PCIe NICs instead of the Onboard 1 Gig NICs. * See the [Switch PXE Boot from Onboard NIC to PCIe](../Switch_PXE_Boot_From_Onboard_NICs_to_PCIe.md) procedure. -**Note: ** - -For adding FMN (Fabric Manager Node) to CSM there is a new prompt added to confirm if the node getting added is an FMN or not: - - ```bash - ncn-m001:/usr/share/doc/csm/scripts/operations/node_management/Add_Remove_Replace_NCN # ./ncn_add_pre-req.py - ``` - ```text - The prerequisite script prepares adding NCNs by adjusting SLS network configurations. - - Please enter answer as an integer. - How many NCNs would you like to add? Do not include NCNs to be removed or moved. - 1 - - Please answer with yes or no. - Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] - y - ``` - ### Add NCN procedure The following is a high-level overview of the add NCN workflow: From 29f8339deac893a5e0b7049da7dd9362a7d6ad08 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:01:24 +0530 Subject: [PATCH 29/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 9066b7f8f3ba2..e98dad0451939 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -143,10 +143,10 @@ After creating the FMN base image, add FMN nodes to CSM by following the [NCN ad **Note:** * Below are the Interface level differences to be considered while following NCN add procedure for FMNs: - * As part of the [prerequisites](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites), there is a new prompt added to - confirm if the node getting added is an FMN or not. - * As part of the [step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls), include the new parameter - `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the FMN base image creation stage above. + * As part of the [NCNs add prerequisites](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites), there is a new + prompt added to confirm if the node getting added is an FMN or not. + * As part of the [add NCN to BSS, HSM, and SLS step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls), include + the new parameter `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the FMN base image creation stage above. After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. From 18904c5c921dfd067518e1f2ddcf86ea5535af95 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:02:40 +0530 Subject: [PATCH 30/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index e98dad0451939..2d0f702b1f70b 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -146,7 +146,7 @@ After creating the FMN base image, add FMN nodes to CSM by following the [NCN ad * As part of the [NCNs add prerequisites](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites), there is a new prompt added to confirm if the node getting added is an FMN or not. * As part of the [add NCN to BSS, HSM, and SLS step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls), include - the new parameter `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the FMN base image creation stage above. + the new parameter `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the [FMN base image creation stage](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. From 1412e04afca16dab9d1f3d90f83c7d4096b4a085 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:05:35 +0530 Subject: [PATCH 31/94] Update Add_NCN_Data.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs/Add_NCN_Data.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md index 05e7cf6545c73..be3be4f7d3e42 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md @@ -357,6 +357,23 @@ The NCN MAC addresses need to be collected using the [Collect NCN MAC Addresses] --mac-lan1 b8:59:9f:d9:9d:e9 ``` + * Optional: For FMNs (Fabric Manager Nodes), we need to pass on `--fmn-image-id` parameter with FMN base image ID generated in the [FMN base image creation stage] + (https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). + + For Example: Base image id of FMN is `06135c73-bcd9-4d38-928f-ada20bdf6a6` + + ```bash + cd /usr/share/doc/csm/scripts/operations/node_management/Add_Remove_Replace_NCNs/ + ./add_management_ncn.py ncn-data \ + --xname "${XNAME}" \ + --alias "${NODE}" \ + --fmn-image-id 06135c73-bcd9-4d38-928f-ada20bdf6a6f \ + --mac-mgmt0 a4:bf:01:65:6a:aa \ + --mac-mgmt1 a4:bf:01:65:6a:ab \ + --mac-lan0 b8:59:9f:d9:9d:e8 \ + --mac-lan1 b8:59:9f:d9:9d:e9 + ``` + 1. (`ncn-mw#`) Run the `add_management_ncn.py` script again, adding the `--perform-changes` argument to the command run in the previous step: > ***NOTE*** Depending on the networking configuration of the system the CMN or CAN networks From c4196d03216279dc80a5f71cc4454728745733c7 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:36:14 +0530 Subject: [PATCH 32/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 138 +++++++++++++++++- 1 file changed, 133 insertions(+), 5 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 2d0f702b1f70b..80f9d045a329e 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -83,6 +83,10 @@ Create `sat bootprep` configuration file (`fmn_bootprep.yaml`) for FMN as below. For Example: +```bash +ncn-m001:~ # cat fmn_bootprep.yaml +``` + ```yaml schema_version: 1.0.2 configurations: @@ -91,19 +95,19 @@ configurations: - name: fmn-nodes-bm playbook: ncn_nodes.yml git: - commit: + commit: 64c8753fbc3143ec8b889a755a445b5bbc8007fd url: https://api-gw-service-nmn.local/vcs/cray/csm-config-management.git - name: fmn-initrd-bm playbook: ncn-initrd.yml git: - commit: + commit: 64c8753fbc3143ec8b889a755a445b5bbc8007fd url: https://api-gw-service-nmn.local/vcs/cray/csm-config-management.git images: - name: fabricmanager-bm-node-image-1.0.0 base: product: name: csm - version: 1.7.1 + version: 1.7.1-beta.10 type: image filter: prefix: secure-kubernetes @@ -299,14 +303,138 @@ ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status ### Validation -#### Validate FMN nodes base OS bring up successful completion +#### Validate base FMN nodes bring up successful completion + +1. Check if we are able to access both FMN nodes (`fmn001` and `fmn002`): + +```bash +ncn-m001:~ # ssh fmn001 +Last login: Thu Dec 4 11:25:30 2025 from 10.252.1.10 +... +``` + +```bash +ncn-m001:~ # ssh fmn002 +Last login: Thu Dec 4 05:03:46 2025 from 10.252.1.10 +... +``` + +2. Check if both FMN nodes are shown under `sat status`: + +```bash +ncn-m001:~ # sat status | grep fmn +``` + +```text +INFO: All values for 'Most Recent Session Template' are 'MISSING', omitting key. +| x3000c0s28b0n0 | fmn001 | Node | 100011 | On | OK | True | X86 | River | Management | FabricManager | Sling | True | fmn-bm-default-configuration | configured | 0 | stable | MISSING | MISSING | +| x3000c0s29b0n0 | fmn002 | Node | 100012 | On | OK | True | X86 | River | Management | FabricManager | Sling | True | fmn-bm-default-configuration | configured | 0 | stable | MISSING | MISSING | +``` + +3. Optionally check more details on the FMN nodes + +For Example: + +```bash +ncn-m001:~ # XNAME=x3000c0s28b0n0 +``` + +```bash +ncn-m001:~ # cray hsm state components describe "${XNAME}" --format toml +``` + +```text +ID = "x3000c0s28b0n0" +Type = "Node" +State = "On" +Flag = "OK" +Enabled = true +Role = "Management" +SubRole = "FabricManager" +NID = 100011 +NetType = "Sling" +Arch = "X86" +Class = "River" +``` + +```bash +ncn-m001:~ # XNAME=x3000c0s29b0n0 +``` + +```bash +ncn-m001:~ # cray hsm state components describe "${XNAME}" --format toml +``` + +```text +ID = "x3000c0s29b0n0" +Type = "Node" +State = "On" +Flag = "OK" +Enabled = true +Role = "Management" +SubRole = "FabricManager" +NID = 100012 +NetType = "Sling" +Arch = "X86" +Class = "River" +``` + #### Validate FMN required networking configuration + +```bash +ncn-m001:~/sav/csm-config # cray sls networks list +``` + +```text +... +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn001-cmn", "time-cmn", "time-cmn.local",] +Comment = "x3000c0s28b0n0" +IPAddress = "10.102.193.42" +Name = "fmn001" + +... +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn001-mtl", "time-mtl", "time-mtl.local",] +Comment = "x3000c0s28b0n0" +IPAddress = "10.1.1.10" +Name = "fmn001" +... + +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn-vip.local",] +Comment = "fmn-virtual-ip" +IPAddress = "10.252.1.13" +Name = "fmn-vip" + +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn001-nmn", "time-nmn", "time-nmn.local", "x3000c0s28b0n0", "fmn001.local",] +Comment = "x3000c0s28b0n0" +IPAddress = "10.252.1.12" +Name = "fmn001" + +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn001-hmn", "time-hmn", "time-hmn.local",] +Comment = "x3000c0s28b0n0" +IPAddress = "10.254.1.21" +Name = "fmn001" +... + +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn001-chn", "time-chn", "time-chn.local",] +Comment = "x3000c0s28b0n0" +IPAddress = "10.102.193.206" +Name = "fmn001" + +``` + #### Validate FMN required storage configuration (LVM partitions) + #### Validate addition of FM required repositories ### Install Fabric Manager on FM baremetal nodes -For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Upgrade](...) +For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Install/ Upgrade](...) ## Uninstall FMN Helm Chart From 226ee6a88cf7654bb357f106190ca5d4c1866d11 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:54:32 +0530 Subject: [PATCH 33/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 80f9d045a329e..3971aae69938f 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -430,6 +430,94 @@ Name = "fmn001" #### Validate FMN required storage configuration (LVM partitions) +Check if both LVM partiions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under "/opt/cray/FW/sc-firmware" and "/opt/slingshot" rescpectively. + +```bash +fmn001:~ # lsblk +``` + +```text +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +loop0 7:0 0 2.2G 1 loop /run/rootfsbase +sda 8:0 0 3.5T 0 disk +├─sda1 8:1 0 476M 0 part +│ └─md127 9:127 0 475.9M 0 raid1 /metal/recovery +├─sda2 8:2 0 22.8G 0 part +│ └─md125 9:125 0 22.8G 0 raid1 /run/initramfs/live +├─sda3 8:3 0 139.7G 0 part +│ └─md124 9:124 0 139.6G 0 raid1 /run/initramfs/overlayfs +└─sda4 8:4 0 139.7G 0 part + └─md126 9:126 0 279.1G 0 raid0 + ├─metalvg0-SCFIRMWARE 254:0 0 80G 0 lvm /opt/cray/FW/sc-firmware + └─metalvg0-SLINGSHOT 254:1 0 120G 0 lvm /opt/slingshot +sdb 8:16 0 3.5T 0 disk +├─sdb1 8:17 0 476M 0 part +│ └─md127 9:127 0 475.9M 0 raid1 /metal/recovery +├─sdb2 8:18 0 22.8G 0 part +│ └─md125 9:125 0 22.8G 0 raid1 /run/initramfs/live +├─sdb3 8:19 0 139.7G 0 part +│ └─md124 9:124 0 139.6G 0 raid1 /run/initramfs/overlayfs +└─sdb4 8:20 0 139.7G 0 part + └─md126 9:126 0 279.1G 0 raid0 + ├─metalvg0-SCFIRMWARE 254:0 0 80G 0 lvm /opt/cray/FW/sc-firmware + └─metalvg0-SLINGSHOT 254:1 0 120G 0 lvm /opt/slingshot +sdc 8:32 0 3.5T 0 disk +sdd 8:48 0 3.5T 0 disk +``` + +```bash +fmn001:~ # mount | grep /opt/cray/FW/sc-firmware +/dev/mapper/metalvg0-SCFIRMWARE on /opt/cray/FW/sc-firmware type ext4 (rw,relatime,stripe=256) +``` + +```bash +fmn001:~ # mount | grep /opt/slingshot +/dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) +``` + +```bash +fmn002:~ # lsblk +``` + +```text +NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS +loop0 7:0 0 2.2G 1 loop /run/rootfsbase +sda 8:0 0 3.5T 0 disk +├─sda1 8:1 0 476M 0 part +│ └─md127 9:127 0 475.9M 0 raid1 /metal/recovery +├─sda2 8:2 0 22.8G 0 part +│ └─md126 9:126 0 22.8G 0 raid1 /run/initramfs/live +├─sda3 8:3 0 139.7G 0 part +│ └─md125 9:125 0 139.6G 0 raid1 /run/initramfs/overlayfs +└─sda4 8:4 0 139.7G 0 part + └─md124 9:124 0 279.1G 0 raid0 + ├─metalvg0-SCFIRMWARE 254:0 0 80G 0 lvm /opt/cray/FW/sc-firmware + └─metalvg0-SLINGSHOT 254:1 0 120G 0 lvm /opt/slingshot +sdb 8:16 0 3.5T 0 disk +├─sdb1 8:17 0 476M 0 part +│ └─md127 9:127 0 475.9M 0 raid1 /metal/recovery +├─sdb2 8:18 0 22.8G 0 part +│ └─md126 9:126 0 22.8G 0 raid1 /run/initramfs/live +├─sdb3 8:19 0 139.7G 0 part +│ └─md125 9:125 0 139.6G 0 raid1 /run/initramfs/overlayfs +└─sdb4 8:20 0 139.7G 0 part + └─md124 9:124 0 279.1G 0 raid0 + ├─metalvg0-SCFIRMWARE 254:0 0 80G 0 lvm /opt/cray/FW/sc-firmware + └─metalvg0-SLINGSHOT 254:1 0 120G 0 lvm /opt/slingshot +sdc 8:32 0 3.5T 0 disk +sdd 8:48 0 3.5T 0 disk +``` + +```bash +fmn002:~ # mount | grep /opt/cray/FW/sc-firmware +/dev/mapper/metalvg0-SCFIRMWARE on /opt/cray/FW/sc-firmware type ext4 (rw,relatime,stripe=256) +``` + +```bash +fmn002:~ # mount | grep /opt/slingshot +/dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) +``` + #### Validate addition of FM required repositories ### Install Fabric Manager on FM baremetal nodes From cc1b9a831419c084e9738fcc108874ed19fbf52a Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:17:15 +0530 Subject: [PATCH 34/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 3971aae69938f..8e64133d34986 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -381,6 +381,8 @@ Class = "River" #### Validate FMN required networking configuration +Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). + ```bash ncn-m001:~/sav/csm-config # cray sls networks list ``` @@ -430,7 +432,7 @@ Name = "fmn001" #### Validate FMN required storage configuration (LVM partitions) -Check if both LVM partiions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under "/opt/cray/FW/sc-firmware" and "/opt/slingshot" rescpectively. +Check if both LVM partiions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` rescpectively on both FMN nodes (`fmn001` and `fmn002`). ```bash fmn001:~ # lsblk @@ -520,6 +522,29 @@ fmn002:~ # mount | grep /opt/slingshot #### Validate addition of FM required repositories +Check if all the required repos are added on both FMN nodes (`fmn001` and `fmn002`) in order to install prerequisite OS RPMs +required during Slingshot Sftware installation. + +For Example: + +```bash +fmn001:~ # zypper lr +``` + +```text +Repository priorities are without effect. All enabled repositories share the same priority. + +# | Alias | Name | Enabled | GPG Check | Refresh +---+-----------------------------------------------------------------------+--------------------------------------------------+---------+-----------+-------- + 1 | SUSE-25.7.250709-SLE-Module-Development-Tools-15-SP6-x86_64-Pool | SUSE-25.7.250709-SLE-Module-Development-Tools--> | Yes | ( ) No | Yes + 2 | SUSE-25.7.250709-SLE-Module-Legacy-15-SP7-x86_64-Updates | SUSE-25.7.250709-SLE-Module-Legacy-15-SP7-x86_-> | Yes | ( ) No | Yes + 3 | SUSE-25.7.250709-SLE-Module-Server-Applications-15-SP7-x86_64-Pool | SUSE-25.7.250709-SLE-Module-Server-Application-> | Yes | ( ) No | Yes + 4 | SUSE-SLE-Module-Basesystem-15-SP6-x86_64-Pool | SUSE-SLE-Module-Basesystem-15-SP6-x86_64-Pool | Yes | ( ) No | Yes + 5 | SUSE-SLE-Module-Containers-15-SP7-x86_64-Updates | SUSE-SLE-Module-Containers-15-SP7-x86_64-Updates | Yes | ( ) No | Yes + 6 | csm-embedded | csm-embedded | Yes | ( ) No | Yes + ... +``` + ### Install Fabric Manager on FM baremetal nodes For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Install/ Upgrade](...) From 757bb094b97e758961eb9462368ccdd22bea5d47 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:27:37 +0530 Subject: [PATCH 35/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index d1f0dd053d873..9a2191b623b12 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -15,9 +15,8 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i **NOTE**: -* FM on baremetal is disabled by default. -* FM cannot be disabled after it has been enabled. * `FMNs` are considered Management nodes. +* FM cannot be disabled after it has been enabled. * The two `FMNs` must be part of two different management racks to support Rack Resiliency. * This feature will not be supported on systems with Dell/ Mellanox based management networks. @@ -46,7 +45,7 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i [CSM Automatic Network Utility](../../glossary.md#csm-automatic-network-utility-canu) ### SAT -[System Admon Toolkit](../../glossary.md#system-admin-toolkit-sat) +[System Admin Toolkit](../../glossary.md#system-admin-toolkit-sat) ### SMA [System Monitoring Application](../../glossary.md#system-monitoring-application-sma) @@ -57,9 +56,9 @@ In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses na In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and recreate the Fabric Manager on another node, providing continuity. -In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes’ “best‑effort” scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. +In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes “best‑effort” scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. -To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. +To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. ![FM On Baremetal](FM-HA-1.png) @@ -67,9 +66,9 @@ To address these issues, CSM 1.7.1 includes FM on baremetal support, which provi To configure FM on baremetal please follow the [procedure](Configure_FM_On_Baremetal.md). -## Slngshot Switch Firmware Update +## Slingshot Switch Firmware Update -* For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as before [See](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). +* For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). * For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates] (https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/blob/23870cdfbda43c015aac641d7619faf0b0003634/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) ## Troubleshooting From c2657cd147c79f724fdc583a2c4adfe288f2e208 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:49:08 +0530 Subject: [PATCH 36/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 8e64133d34986..341e3a715dc7a 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -69,7 +69,7 @@ jq -c '.topology[] | select(.common_name|contains("fmn"))' surtur-ccj.json ### FMN Base Image Creation -The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps detail the process for generating the FMN base image with the required components and deploying it to FMN nodes. +The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps details the process for generating the FMN image. #### Create FMN base image (only base OS; no Fabric Manager) @@ -123,7 +123,7 @@ Execute the commands below on any master node to generate the new FMN image and First set `bootprep` file path: ```bash -# BOOTPREP_FILE_PATH=./fmn_bootpre.yaml +# BOOTPREP_FILE_PATH=./fmn_bootprep.yaml ``` Now execute the `sat bootprep run` command below to generate the new base image and upload it to S3. From e2065010e2cc72acc9cf06acc54267aaf81eb001 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 5 Dec 2025 22:47:43 +0530 Subject: [PATCH 37/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 341e3a715dc7a..7df243ac57893 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -73,7 +73,7 @@ The FabricManager subrole has been introduced to facilitate FMN node discovery a #### Create FMN base image (only base OS; no Fabric Manager) -Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. See (../../operations/configuration_management/Management_Node_Image_Customization.md) +Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. [See] (../../operations/configuration_management/Management_Node_Image_Customization.md) ##### FMN Boot Preparation From 164c39698fccbd26ece7661f9bf48227a1ea56f8 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:23:06 +0530 Subject: [PATCH 38/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 205 ++++-------------- 1 file changed, 46 insertions(+), 159 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 7df243ac57893..436b878fc4d8f 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -11,8 +11,8 @@ and configuring the necessary networking to support Fabric Manager on baremetal ## Note: * Fabric Manager Nodes (`FMNs`) can be added only after the CSM upgrade has been completed. -* By default, Fabric Manager on baremetal is disabled. -* Once enabled, Fabric Manager on baremetal cannot be disabled. +* By default, Fabric Manager would be running on kubernetes as a Kuberetes pod +* After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. ## Post upgrade of CSM from 1.7.0 to 1.7.1 @@ -39,32 +39,6 @@ The administrator must update the SHCD to include the placement and cabling deta Verify that the BMC of each FMN is configured with the correct root user credentials. -### Perform CANU validation - -* Validate SHCD with respect to FMNs -* Map FMNs in the SHCD to the node type: `Management_FabricManager` when building the CCJ file -* Generate switch configuration for the node based on the new Role: `Management` , SubRole: `FabricManager` pairing - -Validate the SHCD. - -**For example:** - -``` bash -canu validate shcd -a TDS --shcd "System5 Surtur Shasta River RevA27.xlsx" --tabs edge,25G_10G,NMN,HMN --corners J1,T3,I14,Q55,I16,S21,J20,U41 --edge Arista -``` - -If the output looks good (Warnings about the CAN switch and SITE connections can be discounted) then generate the CCJ file. - -```bash -canu validate shcd -a TDS --shcd "System5 Surtur Shasta River RevA27.xlsx" --tabs edge,25G_10G,NMN,HMN --corners J1,T3,I14,Q55,I16,S21,J20,U41 --edge Arista --json --out surtur-ccj.json -``` - -Verify that the Fabric Manager nodes are present in the output CCJ file. - -```bash -jq -c '.topology[] | select(.common_name|contains("fmn"))' surtur-ccj.json -``` - ## FMN Pre Boot ### FMN Base Image Creation @@ -142,71 +116,10 @@ sat bootprep run \ ### Add FMN Nodes to CSM -After creating the FMN base image, add FMN nodes to CSM by following the [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md) - -**Note:** - -* Below are the Interface level differences to be considered while following NCN add procedure for FMNs: - * As part of the [NCNs add prerequisites](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#prerequisites), there is a new - prompt added to confirm if the node getting added is an FMN or not. - * As part of the [add NCN to BSS, HSM, and SLS step](../../operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md#add-the-ncn-to-bss-hsm-and-sls), include - the new parameter `--fmn-image-id` only for the FM node. The value for this parameter should be the image ID generated in the [FMN base image creation stage](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). +After creating the FMN base image, add FMN nodes to CSM by following the Follow step 1 to step in [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-worker-storage-master-or-fmnfabric-manager-node-ncns) After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. -The following checks can be used to verify that the updates have been correctly applied: - -#### SLS hardware should list the new nodes - -For Example: - -```bash -cray sls hardware describe x3000c0s28b0n0 -``` - -#### IPs should be allocated and made available for FMNs in all of SLS networks - -**Note:** NMN and HMN should be having additional FMN VIPs also allocated. - -For Example: - -```bash -cray sls search networks list --name NMN --format json -``` - -#### HSM ethernet interfaces should be updated with the same allocated IPs - -For Example: - -```bash -cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json -``` - -#### BSS should be updated with new hosts entries for FMN with proper configurations - -**Note:** BSS global parameters also needs to be updated with FMN IPs(VIP not included). - -For Example: - -```bash -cray bss bootparameters list --format json --name x3000c0s28b0n0 -``` - -```bash -cray bss bootparameters list --hosts Global --format json -``` - -### Update Switch Configuration With CANU - -**Note: ** This step cannot be performed until the Fabric Manager nodes have been added to SLS. - -In order to generate new configuration the following is required: - -* A CCJ file -* Any custom config file specific to the system -* A SLS file that contains the FMNs (`cray sls dumpstate list --format json` may be used to obtain this once SLS has been updated on the running system) -* Knowledge of whether the system has the NMN Isolation feature enabled or not - #### Generate the switch configuration For Example: @@ -232,73 +145,11 @@ canu validate switch config --ip 10.254.0.4 --generated output/sw-leaf-bmc-001.c Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ..." rule be sure to create the new rule before removing the old one. It is possible to lose access to the switch if the ACLs are not applied in the correct order. - ## FMN Booting -Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. - -### Set BMC with node name - -```bash -BMC="${NODE}-mgmt"; echo $BMC -``` - -**Note: ** Here the NODE can be `fmn001` (or) `fmn002`. For example, consider `fmn001` with xname `x3000c0s28b0n0` and `fmn002` with xname `x3000c0s29b0n0`. - -### Get and export IPMI credentials - -```bash -read -r -s -p "${BMC} root password: " IPMI_PASSWORD -export IPMI_PASSWORD -``` +Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. [See] +(../../operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn) -### Open console to check the progress of the upcoming boot - -Run below command in a different terminal to check the progress of the boot which we are going to initiate in the next step. - -**Note: ** Here `xname` can be `fmn001 or `fmn002` based on which FMN is getting booted with. - -```bash -cray console interact -echo ${BMC} -``` - -### Check the current chassis power status - -```bash -ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status -``` -### Set the boot option - -```bash -ipmitool -I lanplus -U root -E -H "${BMC}" chassis bootdev pxe options=efiboot -``` - -### Power off the chassis - -Power off the chassis: - -```bash -ipmitool -I lanplus -U root -E -H "${BMC}" chassis power off -``` -Check the chassis power status: - -```bash -ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status -``` -### Power on the chassis - -Power on the chassis: - -```bash -ipmitool -I lanplus -U root -E -H "${BMC}" chassis power on -``` - -Check the chassis power status: - -```bash -ipmitool -I lanplus -U root -E -H "${BMC}" chassis power status -``` ## FMN Post Boot ### Validation @@ -384,7 +235,7 @@ Class = "River" Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). ```bash -ncn-m001:~/sav/csm-config # cray sls networks list +ncn-m001:~ # cray sls networks list ``` ```text @@ -430,6 +281,46 @@ Name = "fmn001" ``` +#### SLS hardware should list the new nodes + +For Example: + +```bash +cray sls hardware describe x3000c0s28b0n0 +``` + +#### IPs should be allocated and made available for FMNs in all of SLS networks + +**Note:** NMN and HMN should be having additional FMN VIPs also allocated. + +For Example: + +```bash +cray sls search networks list --name NMN --format json +``` + +#### HSM ethernet interfaces should be updated with the same allocated IPs + +For Example: + +```bash +cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json +``` + +#### BSS should be updated with new hosts entries for FMN with proper configurations + +**Note:** BSS global parameters also needs to be updated with FMN IPs(VIP not included). + +For Example: + +```bash +cray bss bootparameters list --format json --name x3000c0s28b0n0 +``` + +```bash +cray bss bootparameters list --hosts Global --format json +``` + #### Validate FMN required storage configuration (LVM partitions) Check if both LVM partiions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` rescpectively on both FMN nodes (`fmn001` and `fmn002`). @@ -548,7 +439,3 @@ Repository priorities are without effect. All enabled repositories share the sam ### Install Fabric Manager on FM baremetal nodes For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Install/ Upgrade](...) - -## Uninstall FMN Helm Chart - -After FMNs have comeup healthy and Running, uninstall existing FM helm chart (FM K8s pod) `slingshot-fabric-manager`. From 8842e7ea7b42da6312a798917c169f91aeccf6eb Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:27:47 +0530 Subject: [PATCH 39/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 9a2191b623b12..04aae3a4baab7 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -9,14 +9,14 @@ ## Introduction -The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. While the overall bare-metal Fabric Manager solution is described in the Slingshot Fabric Manager HA documentation , this CSM detail design document focuses specifically on the CSM-level enhancements required to integrate and support FMNs. +The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. The overall bare-metal Fabric Manager solution is described in the Slingshot Fabric Manager HA documentation . CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. **NOTE**: * `FMNs` are considered Management nodes. -* FM cannot be disabled after it has been enabled. +* After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. * The two `FMNs` must be part of two different management racks to support Rack Resiliency. * This feature will not be supported on systems with Dell/ Mellanox based management networks. From e4c8605c4f4accbcd6beef508ba4c7747d51c1ad Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:32:29 +0530 Subject: [PATCH 40/94] Update Add_NCN_Data.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md index be3be4f7d3e42..341eeb135fda2 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md @@ -357,8 +357,8 @@ The NCN MAC addresses need to be collected using the [Collect NCN MAC Addresses] --mac-lan1 b8:59:9f:d9:9d:e9 ``` - * Optional: For FMNs (Fabric Manager Nodes), we need to pass on `--fmn-image-id` parameter with FMN base image ID generated in the [FMN base image creation stage] - (https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). + * For FMNs (Fabric Manager Nodes), where alias is fmn00*, we need to pass on additional `--fmn-image-id` parameter with FMN base image ID + generated in the [FMN base image creation stage](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). For Example: Base image id of FMN is `06135c73-bcd9-4d38-928f-ada20bdf6a6` From b68531feee6306aec3d502d3c6a58705042be786 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:40:43 +0530 Subject: [PATCH 41/94] Update Add_Remove_Replace_NCNs.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs.md | 53 +++++-------------- 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index f8321d9e276f1..909e4ec1aab2b 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -38,7 +38,15 @@ The latest CSM documentation has been installed on the master nodes. See [Check ./ncn_add_pre-req.py ``` - The script will ask the following question: + Note: For adding FMNs (Fabric Manager Nodes) to CSM there is a new prompt added to confirm if the node getting added is an FMN or not: + + ```text + Please answer with yes or no. + Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] + y + ``` + + Overall, the script `ncn_add_pre-req.py` will ask the following question: ```text How many NCNs would you like to add? Do not include NCNs to be removed or moved. @@ -53,6 +61,10 @@ The latest CSM documentation has been installed on the master nodes. See [Check How many NCNs would you like to add? Do not include NCNs to be removed or moved. 10 + Please answer with yes or no. + Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] + N + You are about to make DESTRUCTIVE changes to the system. If you are sure you want to proceed. Please type: PROCEED @@ -149,45 +161,6 @@ The latest CSM documentation has been installed on the master nodes. See [Check Restarting cray-dhcp-kea ``` - -2. Optional: For adding FMNs (Fabric Manager Nodes) to CSM there is a new prompt added to confirm if the node getting added is an FMN or not: - - The script `ncn_add_pre-req.py` will ask the following questions: - - Existing prompt to add number of NCN nodes: - - ```text - How many NCNs would you like to add? Do not include NCNs to be removed or moved. - ``` - - Additional new prompt to consider these NCN node(s) as FMN(s): - - ```text - Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] - ``` - - Example output: - - ```text - The prerequisite script prepares adding NCNs by adjusting SLS network configurations. - - Please enter answer as an integer. - How many NCNs would you like to add? Do not include NCNs to be removed or moved. - 1 - - Please answer with yes or no. - Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] - y - - You are about to make DESTRUCTIVE changes to the system. - - If you are sure you want to proceed. Please type: PROCEED - - If you want to stop. Type: exit or press ctrl-c - - PROCEED - ... - ``` ## Add worker, storage, master or FMN(Fabric Manager Node) NCNs From e8d90d56e40355001a58f63489aeea663854f26b Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:49:10 +0530 Subject: [PATCH 42/94] Update Boot_NCN.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md b/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md index 0b46e98dea2b4..c4bcd374c9ee5 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md @@ -262,3 +262,6 @@ Follow [Add Ceph Node](../../utility_storage/Add_Ceph_Node.md) to join the added Proceed to [Redeploy Services](Redeploy_Services.md) or return to the main [Add, Remove, Replace, or Move NCNs](Add_Remove_Replace_NCNs.md) page. + +**Note: ** +* For FMN nodes we can skip rest of the steps. From c53a1c9846e71346bae9019cb7663adcc3950c16 Mon Sep 17 00:00:00 2001 From: Chris Spiller <86013738+spillerc-hpe@users.noreply.github.com> Date: Thu, 8 Jan 2026 18:28:46 +0000 Subject: [PATCH 43/94] CASMINST-7513 - Add steps to deploy fmn nodes during CSM install (#6451) --- install/deploy_non-compute_nodes.md | 76 ++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/install/deploy_non-compute_nodes.md b/install/deploy_non-compute_nodes.md index 77159bb9f3c3b..822351cafcd77 100644 --- a/install/deploy_non-compute_nodes.md +++ b/install/deploy_non-compute_nodes.md @@ -2,7 +2,7 @@ The following procedure deploys Linux and Kubernetes software to the management NCNs. Deployment of the nodes starts with booting the storage nodes, followed by the master nodes -and worker nodes together. +and worker nodes together. Optionally, HPE Slingshot Fabric Manager nodes can also be deployed. After the operating system boots on each node, there are some configuration actions which take place. Watching the console or the console log for certain nodes can help to understand @@ -26,7 +26,8 @@ the number of storage and worker nodes. 1. [Deploy management nodes](#2-deploy-management-nodes) 1. [Deploy storage NCNs](#21-deploy-storage-ncns) 1. [Deploy Kubernetes NCNs](#22-deploy-kubernetes-ncns) - 1. [Configure `kubectl` on the PIT](#23-configure-kubectl-on-the-pit) + 1. [Deploy HPE Slingshot Fabric Manager nodes (optional)](#23-deploy-hpe-slingshot-fabric-manager-nodes-optional) + 1. [Configure `kubectl` on the PIT](#24-configure-kubectl-on-the-pit) 1. [Validate deployment](#3-validate-deployment) 1. [Next topic](#next-topic) @@ -52,9 +53,11 @@ Preparation of the environment must be done before attempting to deploy the mana > These values do not need to be altered from what is shown. ```bash - export IPMI_PASSWORD ; mtoken='ncn-m(?!001)\w+-mgmt' ; stoken='ncn-s\w+-mgmt' ; wtoken='ncn-w\w+-mgmt' + export IPMI_PASSWORD ; mtoken='ncn-m(?!001)\w+-mgmt' ; stoken='ncn-s\w+-mgmt' ; wtoken='ncn-w\w+-mgmt' ; ftoken='fmn\w+-mgmt' ``` + > **NOTE:** The `ftoken` variable is used for HPE Slingshot Fabric Manager nodes, which are optional and not present on all systems. + ### 1.2. BIOS baseline 1. (`pit#`) If the NCNs are HPE hardware, then ensure that DCMI/IPMI is enabled. @@ -68,14 +71,16 @@ Preparation of the environment must be done before attempting to deploy the mana 1. (`pit#`) Check power status of all NCNs. ```bash - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power status ``` + > **NOTE:** If the system does not have HPE Slingshot Fabric Manager nodes, the `ftoken` pattern will not match any entries. + 1. (`pit#`) Power off all NCNs. ```bash - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power off ``` @@ -89,16 +94,16 @@ Preparation of the environment must be done before attempting to deploy the mana - Disable VT-x, AMD-V, SVM, VT-d, and AMD IOMMU for Virtualization, on both AMD and Intel CPUs; there is no way to enable at this time. ```bash - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} chassis bootdev none options=clear-cmos ``` 1. (`pit#`) Boot NCNs to BIOS to allow the CMOS to reinitialize. ```bash - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} chassis bootdev bios options=efiboot - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power on ``` @@ -113,7 +118,7 @@ Preparation of the environment must be done before attempting to deploy the mana 1. (`pit#`) Power off the nodes. ```bash - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power off ``` @@ -134,8 +139,8 @@ for all nodes, the Ceph storage will have been initialized and the Kubernetes cl 1. (`pit#`) Set each node to always UEFI network boot, and ensure that they are powered off. ```bash - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} chassis bootdev pxe options=efiboot,persistent - grep -oP "(${mtoken}|${stoken}|${wtoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power off + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} chassis bootdev pxe options=efiboot,persistent + grep -oP "(${mtoken}|${stoken}|${wtoken}|${ftoken})" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power off ``` > **NOTE:** The NCN boot order is further explained in [NCN Boot Workflow](../background/ncn_boot_workflow.md). @@ -243,7 +248,54 @@ for all nodes, the Ceph storage will have been initialized and the Kubernetes cl > **NOTE:** To exit a conman console, press `&` followed by a `.` (e.g. keystroke `&.`) -### 2.3 Configure `kubectl` on the PIT +### 2.3 Deploy HPE Slingshot Fabric Manager nodes (optional) + +> **NOTE:** This section only applies to systems with HPE Slingshot Fabric Manager nodes. If the system does not have Fabric Manager nodes, skip this section and proceed to [Configure `kubectl` on the PIT](#24-configure-kubectl-on-the-pit). + +HPE Slingshot Fabric Manager nodes have hostnames like `fmn001`, `fmn002`, etc., with corresponding BMC names like `fmn001-mgmt`, `fmn002-mgmt`, etc. + +1. (`pit#`) Verify that Fabric Manager nodes are present in the system. + + ```bash + grep -oP "${ftoken}" /etc/dnsmasq.d/statics.conf | sort -u + ``` + + If this command returns no output, there are no Fabric Manager nodes to deploy. Skip the remaining steps in this section. + +1. (`pit#`) Check power status of Fabric Manager nodes. + + ```bash + grep -oP "${ftoken}" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power status + ``` + +1. (`pit#`) Boot the **Fabric Manager nodes**. + + ```bash + grep -oP "${ftoken}" /etc/dnsmasq.d/statics.conf | sort -u | xargs -t -i ipmitool -I lanplus -U "${USERNAME}" -E -H {} power on + ``` + +1. (`pit#`) Observe the installation through the console of the first Fabric Manager node. + + ```bash + conman -j fmn001-mgmt + ``` + + > **NOTES:** + > + > - If the nodes have PXE boot issues (e.g. getting PXE errors, not pulling the `ipxe.efi` binary), then see [Troubleshooting PXE Boot](troubleshooting_pxe_boot.md). + > - To exit a conman console, press `&` followed by a `.` (e.g. keystroke `&.`) + +1. (`pit#`) Wait for the Fabric Manager nodes to complete `cloud-init`. + + The following text should appear in the console: + + ```text + The system is finally up, after XXXX.XX seconds cloud-init has come to completion. + ``` + + > **NOTE:** The duration reported will vary. + +### 2.4 Configure `kubectl` on the PIT 1. (`pit#`) This was done in a previous step, but if the user is resuming/starting here then the first master needs to be redefined. From a8158c2cf969a99eb9f2db6c4b36ac58f60fb4e2 Mon Sep 17 00:00:00 2001 From: Chris Spiller <86013738+spillerc-hpe@users.noreply.github.com> Date: Thu, 15 Jan 2026 16:18:52 +0000 Subject: [PATCH 44/94] CASMINST-7513 - DOCS: Add steps to deploy fmn nodes during CSM install (#6453) * CASMINST-7513 - DOCS: Add steps to deploy fmn nodes during CSM install * Apply suggestions from code review Co-authored-by: Nathan Rockershousen Signed-off-by: Chris Spiller <86013738+spillerc-hpe@users.noreply.github.com> * Add indicatator where fix-spire-on-fmn.sh should run * Use simplified Boot_NCN.md procedure to deploy FMN nodes and remove redundant manual step to set metal.no-wipe=1 --------- Signed-off-by: Chris Spiller <86013738+spillerc-hpe@users.noreply.github.com> Co-authored-by: Nathan Rockershousen --- install/README.md | 9 ++ .../Configure_FM_On_Baremetal.md | 8 ++ .../Redeploy_Fabric_Manager_Nodes.md | 121 ++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md diff --git a/install/README.md b/install/README.md index 1c17d3b8b37b4..b3c56e0e7c9b6 100644 --- a/install/README.md +++ b/install/README.md @@ -74,6 +74,7 @@ shown here with numbered topics. 1. [Kubernetes encryption](#1-kubernetes-encryption) 1. [Export Nexus data](#2-export-nexus-data) - [Installation of additional HPE Cray EX software products](#installation-of-additional-hpe-cray-ex-software-products) +- [Fabric Manager Node redeployment](#fabric-manager-node-redeployment) > **`NOTE`** If problems are encountered during the installation, > [Troubleshooting installation problems](#12-troubleshooting-installation-problems) and @@ -336,3 +337,11 @@ See the [Install or upgrade additional products with IUF](../operations/iuf/work procedure to continue with the installation of additional HPE Cray EX software products. For additional information on the IUF, see [Install and Upgrade Framework](../operations/iuf/IUF.md). + +## Fabric Manager Node redeployment + +> **OPTIONAL:** This section is only applicable if Fabric Manager nodes were deployed during the CSM installation. + +After additional HPE Cray EX software products have been installed, Fabric Manager nodes need to be redeployed with a new customized image. + +See [Redeploy Fabric Manager Nodes](../operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md). diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 436b878fc4d8f..815b5ff22c345 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -436,6 +436,14 @@ Repository priorities are without effect. All enabled repositories share the sam ... ``` +#### Join Fabric Manager nodes to Spire + +After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. + +```bash +ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh +``` + ### Install Fabric Manager on FM baremetal nodes For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Install/ Upgrade](...) diff --git a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md new file mode 100644 index 0000000000000..736b2afd36380 --- /dev/null +++ b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md @@ -0,0 +1,121 @@ +# Redeploy Fabric Manager Nodes + +> **OPTIONAL:** This procedure is only applicable if Fabric Manager nodes were deployed during the CSM installation. + +Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment did not include the final image with all necessary components. Once the other HPE Cray EX software products have been installed via the Install and Upgrade Framework (IUF), the FMNs need to be redeployed with the new customized image. + +## Prerequisites + +- CSM installation has been completed +- Additional HPE Cray EX software products have been installed via IUF +- The Cray CLI is configured and authenticated +- SAT is configured and authenticated + +## Procedure + +### 1. Build the FMN image + +Follow the procedure in the **FMN Base Image Creation** section of [Configure FM (Fabric Manager) On Baremetal](Configure_FM_On_Baremetal.md#fmn-base-image-creation) to build the new FMN base image. + +This procedure will: + +- Create a `sat bootprep` configuration file for FMN +- Execute `sat bootprep run` to generate the new FMN image and upload it to S3 +- Produce a new IMS image ID for the customized FMN image + +### 2. Update BSS with the new FMN image + +Once the new FMN image has been built and uploaded to S3, update the boot parameters in the Boot Script Service (BSS) to point the FMNs to the new image. + +1. (`ncn-mw#`) Set an environment variable for the new IMS image ID. + + After running `sat bootprep run`, obtain the IMS resultant image ID from the output or from the CFS session: + + ```bash + NEW_IMS_IMAGE_ID="" + ``` + +1. (`ncn-mw#`) Determine the component names (xnames) of the FMNs. + + ```bash + cray hsm state components list --role Management --subrole FabricManager --format json | jq -r '.Components[].ID' + ``` + + Example output: + + ```text + x3000c0s28b0n0 + x3000c0s29b0n0 + ``` + +1. (`ncn-mw#`) Update the boot parameters for the FMNs. + + Replace the `` placeholders with the actual xnames of the FMNs. + + ```bash + /usr/share/doc/csm/scripts/operations/node_management/assign-ncn-images.sh \ + -p "${NEW_IMS_IMAGE_ID}" + ``` + + For example: + + ```bash + /usr/share/doc/csm/scripts/operations/node_management/assign-ncn-images.sh \ + -p "${NEW_IMS_IMAGE_ID}" x3000c0s28b0n0 x3000c0s29b0n0 + ``` + +1. (`ncn-mw#`) Verify the boot parameters have been updated. + + ```bash + cray bss bootparameters list --name --format json | jq -r '.[0].params' | grep metal.server + ``` + + The output should show the new IMS image ID in the `metal.server` parameter. + +1. (`ncn-mw#`) Set `metal.no-wipe=0` to allow the disk to be wiped during redeployment. + + For each FMN, set `metal.no-wipe=0`: + + ```bash + TARGET_XNAME= + csi handoff bss-update-param --set metal.no-wipe=0 --limit ${TARGET_XNAME} + ``` + + For example: + + ```bash + TARGET_XNAME=x3000c0s28b0n0 + csi handoff bss-update-param --set metal.no-wipe=0 --limit ${TARGET_XNAME} + ``` + + Repeat for each FMN. + +1. (`ncn-mw#`) Verify the change: + + ```bash + cray bss bootparameters list --name ${TARGET_XNAME} --format=json | jq -r '.[0].params' | grep metal.no-wipe + ``` + + The output should show `metal.no-wipe=0`. + +### 3. Redeploy the FMNs + +After updating BSS with the new image and setting `metal.no-wipe=0`, redeploy the FMNs to apply the new image. + +Follow the [Boot NCN](../node_management/Add_Remove_Replace_NCNs/Boot_NCN.md) procedure for each Fabric Manager node. This procedure will: + +- Set the PXE boot option and power on the node +- Monitor the boot process +- Set `metal.no-wipe=1` after successful boot to preserve data on future reboots + +**Note:** Skip the sections in Boot NCN that are specific to master, worker, or storage nodes (such as verifying cluster membership or Ceph operations). + +### 4. Join Fabric Manager nodes to Spire + +After the Fabric Manager nodes have been redeployed and are running with the new image, join them to Spire to avoid issues with Spire tokens. + +1. (`ncn-mw#`) Join Spire on the Fabric Manager nodes. + + ```bash + /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh + ``` From 47bf9639798e1309836bbec96d9236b25aa46f92 Mon Sep 17 00:00:00 2001 From: Chris Spiller <86013738+spillerc-hpe@users.noreply.github.com> Date: Mon, 19 Jan 2026 21:52:42 +0000 Subject: [PATCH 45/94] CASMTRIAGE-8993 - apply_csm_configuration.sh needs to be updated to ignore fabric manager nodes (#6464) * CASMTRIAGE-8993 - apply_csm_configuration.sh needs to be updated to ignore fabric manager nodes * Fix license * fixed non-ascii quotes --------- Co-authored-by: Jason Davis --- operations/fm_on_baremetal/README.md | 2 +- scripts/operations/configuration/apply_csm_configuration.sh | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 04aae3a4baab7..4802284c08285 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -56,7 +56,7 @@ In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses na In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and recreate the Fabric Manager on another node, providing continuity. -In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes “best‑effort” scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. +In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes "best‑effort" scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. diff --git a/scripts/operations/configuration/apply_csm_configuration.sh b/scripts/operations/configuration/apply_csm_configuration.sh index 4887fff783335..90c100f9a7a7e 100755 --- a/scripts/operations/configuration/apply_csm_configuration.sh +++ b/scripts/operations/configuration/apply_csm_configuration.sh @@ -2,7 +2,7 @@ # # MIT License # -# (C) Copyright 2021-2025 Hewlett Packard Enterprise Development LP +# (C) Copyright 2021-2026 Hewlett Packard Enterprise Development LP # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -196,7 +196,8 @@ BACKUP_NCN_CONFIG_FILE=$(run_mktemp --tmpdir="${TMPDIR}" "backup-${CONFIG_NAME}- if [[ -z ${XNAMES} ]]; then echo "Retrieving a list of all management node component names (xnames)" - XNAMES=$(cray hsm state components list --role Management --type Node --format json | jq -r '.Components | map(.ID) | join(",")') + echo "NOTE: FabricManager nodes are excluded from configuration" + XNAMES=$(cray hsm state components list --role Management --type Node --format json | jq -r '.Components | map(select(.SubRole != "FabricManager")) | map(.ID) | join(",")') [[ -n ${XNAMES} ]] || err_exit "No management nodes found in HSM" fi XNAME_LIST=${XNAMES//,/ } From eccdb7d661d3b2e474d9c9ef1473b510407375c7 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:12:12 +0530 Subject: [PATCH 46/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 815b5ff22c345..9d278a0cc961b 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -47,7 +47,7 @@ The FabricManager subrole has been introduced to facilitate FMN node discovery a #### Create FMN base image (only base OS; no Fabric Manager) -Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. [See] (../../operations/configuration_management/Management_Node_Image_Customization.md) +Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. [See] (../configuration_management/Management_Node_Image_Customization.md) ##### FMN Boot Preparation @@ -446,4 +446,4 @@ ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh ### Install Fabric Manager on FM baremetal nodes -For install/ upgrade Fabric Manager on the FMNs please refer [FabricManager Install/ Upgrade](...) +For install/ upgrade Fabric Manager on the FMNs please refer _HPE Slingshot Installation Guide for CSM_. From c28afefb27b99ec4099c4511ed6baf7df3c9ebce Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:13:08 +0530 Subject: [PATCH 47/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 9d278a0cc961b..9ce9e91387feb 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -47,7 +47,7 @@ The FabricManager subrole has been introduced to facilitate FMN node discovery a #### Create FMN base image (only base OS; no Fabric Manager) -Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. [See] (../configuration_management/Management_Node_Image_Customization.md) +Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. [See](../configuration_management/Management_Node_Image_Customization.md) ##### FMN Boot Preparation From d5e22b660867a6aff84247661b33b71de75455f5 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:15:23 +0530 Subject: [PATCH 48/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 9ce9e91387feb..b96b7b49c2231 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -147,8 +147,7 @@ Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ## FMN Booting -Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. [See] -(../../operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn) +Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. [See](../node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn) ## FMN Post Boot From b5abb30f157b56a89bc80e117299768e3bcddd5e Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:18:35 +0530 Subject: [PATCH 49/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 4802284c08285..3270dd20fa19e 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -69,7 +69,7 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem ## Slingshot Switch Firmware Update * For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). -* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates] (https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/blob/23870cdfbda43c015aac641d7619faf0b0003634/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) +* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates](https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/blob/23870cdfbda43c015aac641d7619faf0b0003634/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) ## Troubleshooting From cf56df8a6428b7a57da3160749e1bb440f94b926 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:20:51 +0530 Subject: [PATCH 50/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 3270dd20fa19e..4c75d6bfa521d 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -69,7 +69,7 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem ## Slingshot Switch Firmware Update * For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). -* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates](https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/blob/23870cdfbda43c015aac641d7619faf0b0003634/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) +* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates](https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/tree/main/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) ## Troubleshooting From 2e2233ef3d17096758f316be130aca35ce20596f Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:31:14 +0530 Subject: [PATCH 51/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index b96b7b49c2231..1e694a4bb2a36 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -445,4 +445,4 @@ ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh ### Install Fabric Manager on FM baremetal nodes -For install/ upgrade Fabric Manager on the FMNs please refer _HPE Slingshot Installation Guide for CSM_. +For install/ upgrade Fabric Manager on the FMNs please refer section "3 Install HPE Slingshot Fabric Manager software on bare metal servers" in _HPE Slingshot Installation Guide for CSM_ PDF. From 0be95c80b0e7af425c25754ad593a2c1b8f0c8c5 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:35:06 +0530 Subject: [PATCH 52/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 4c75d6bfa521d..5b3833efe1348 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -69,7 +69,9 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem ## Slingshot Switch Firmware Update * For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). -* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing [switch updates](https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/tree/main/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) +* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in _HPE Slingshot Installation Guide for CSM_ PDF. +* +* (https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/tree/main/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) ## Troubleshooting From 140f74aab5fb6d75d9e12909419816956512a527 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:35:50 +0530 Subject: [PATCH 53/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 5b3833efe1348..10cf3dd9635d8 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -70,8 +70,6 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem * For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). * For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in _HPE Slingshot Installation Guide for CSM_ PDF. -* -* (https://github.hpe.com/hpe/hpc-sshot-slingshot_docs/tree/main/portal/developer-portal/snippets/fm/install_bare_metal_fm_csm.md#optional-update-hpe-slingshot-switch-firmware) ## Troubleshooting From 46234b902f073c4fce146a1fd132eabe33842b80 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:41:36 +0530 Subject: [PATCH 54/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 10cf3dd9635d8..2f7f3bc66fe01 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -4,7 +4,7 @@ - [Terminology and Components](#terminology-and-components) - [Architecture](#architecture) - [Configure FM on baremetal](#configure-fm-on-baremetal) -- [Slngshot Switch Firmware Update](#slngshot-switch-firmware-update) +- [Slingshot Switch Firmware Update](#slngshot-switch-firmware-update) - [Troubleshooting](#troubleshooting) ## Introduction From f7ea39546214e96f9a6de25639715821f2ee15f3 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:42:33 +0530 Subject: [PATCH 55/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 2f7f3bc66fe01..8a3fd0562cf0e 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -4,7 +4,7 @@ - [Terminology and Components](#terminology-and-components) - [Architecture](#architecture) - [Configure FM on baremetal](#configure-fm-on-baremetal) -- [Slingshot Switch Firmware Update](#slngshot-switch-firmware-update) +- [Slingshot Switch Firmware Update](#slingshot-switch-firmware-update) - [Troubleshooting](#troubleshooting) ## Introduction From 4c8e2fb6195c455f53c5575a3b5911b6a86a139e Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:54:34 +0530 Subject: [PATCH 56/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 1e694a4bb2a36..bccb90bf0f8bd 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -22,7 +22,7 @@ Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable F * step 2: [FMN Pre Boot](#fmn-pre-boot) * [FMN Base Image Creation](#fmn-base-image-creation) * [Add FMN Nodes to CSM](#add-fmn-nodes-to-csm) - * [Update Switch Configuration With CANU](#update-switch-configuration-with-canu) + * [Generate Switch Configuration With CANU](#generate-switch-configuration-with-canu) * Step 3: [FMN Booting](#fmn-booting) * Step 4: [FMN Post Boot](#fmn-post-boot) * [Validation](#validation) @@ -120,7 +120,7 @@ After creating the FMN base image, add FMN nodes to CSM by following the Follow After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. -#### Generate the switch configuration +### Generate Switch Configuration With CANU For Example: From 4fa9294b2f3d37d497232751239840052a3d4075 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 14:57:01 +0530 Subject: [PATCH 57/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 1 - 1 file changed, 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index bccb90bf0f8bd..a294355973de5 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -27,7 +27,6 @@ Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable F * Step 4: [FMN Post Boot](#fmn-post-boot) * [Validation](#validation) * [Install Fabric Manager on FM baremetal nodes](#install-fabric-manager-on-fm-baremetal-nodes) -* Step 5: [Uninstall FMN Helm Chart](#uninstall-fmn-helm-chart) ## FMN Prerequisites From c311d71e1d87a482a511238bf9660f35d349eac1 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:25:32 +0530 Subject: [PATCH 58/94] Update glossary.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- glossary.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/glossary.md b/glossary.md index a6d5b90f3fe85..3acf9ad736ce5 100644 --- a/glossary.md +++ b/glossary.md @@ -34,6 +34,8 @@ Glossary of terms used in CSM documentation. * [EX Compute Cabinet](#ex-compute-cabinet) * [EX TDS Cabinet](#ex-tds-cabinet) * [Fabric](#fabric) +* [Fabric Manager](#fabric-manager) +* [Fabric Manager Node](#fabric-manager-node) * [Firmware Action Service (FAS)](#firmware-action-service-fas) * [Floor Standing CDU](#floor-standing-cdu) * [Hardware Management Network (HMN)](#hardware-management-network-hmn) @@ -376,6 +378,17 @@ compute blades and 16 [High Speed Network (HSN)](#high-speed-network-hsn) switch The [Slingshot](#slingshot) fabric consists of the switches, cables, ports, topology policy, and configuration settings for the Slingshot [High-Speed Network](#high-speed-network-hsn). +## Fabric Manager + +The [Slingshot](#slingshot) Fabric Manager software includes a suite of software which configures, manages, and monitors the network. It runs on an +external server and communicates with the switches over the out-of-band management network. + +## Fabric Manager Node + +The [Slingshot](#slingshot) Fabric Manager runs on at least one dedicated server referred to as the HPE Slingshot Fabric Manager Node (FMN). It also runs on the +HPE Slingshot switches. HPE Slingshot Fabric Manager software is installed on a bare metal server (FMN) instead of using Kubernetes pods in order to support +systems with HPE Slingshot version 3.0.0 and above and High Availability (HA) requirements. + ## Firmware Action Service (FAS) The Firmware Action Service (FAS) provides an interface for managing firmware versions of Redfish-enabled hardware in the system. From eac0892410824f67d41fd5a141c07c1e4a59ae89 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:27:04 +0530 Subject: [PATCH 59/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 8a3fd0562cf0e..6e24f7ba6eea8 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -26,10 +26,10 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i [Slingshot Host Software](../../glossary.md#slingshot-host-software-shs) ### FM -[Fabric Manager](...) +[Fabric Manager](../../glossary.md#fabric-manager) ### FMN -[Fabric Manager Node](...) +[Fabric Manager Node](../../glossary.md#fabric-manager-node) ### SLS From 4a01d922f37d712f7f06d25cdb635271918cb9b6 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:39:26 +0530 Subject: [PATCH 60/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 6e24f7ba6eea8..329cd9138a34e 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -5,7 +5,6 @@ - [Architecture](#architecture) - [Configure FM on baremetal](#configure-fm-on-baremetal) - [Slingshot Switch Firmware Update](#slingshot-switch-firmware-update) -- [Troubleshooting](#troubleshooting) ## Introduction @@ -70,7 +69,3 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem * For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). * For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in _HPE Slingshot Installation Guide for CSM_ PDF. - -## Troubleshooting - -For information on how to troubleshoot FM on baremetal, see [Troubleshooting](Troubleshooting.md). From 118ea2adc2dd36cb23825f0599a6f959519a3518 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:45:03 +0530 Subject: [PATCH 61/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../fm_on_baremetal/Configure_FM_On_Baremetal.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index a294355973de5..e8df11cb9f38d 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -1,17 +1,17 @@ -# Configure FM (Fabric Manager) On Baremetal +# Configure FM (Fabric Manager) On `Baremetal` -This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage LUNs, -and configuring the necessary networking to support Fabric Manager on baremetal following the CSM upgrade. +This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage `LUNs`, +and configuring the necessary networking to support Fabric Manager on `baremetal` following the CSM upgrade. ## Requirements * Hardware requirements - 2 bare-metal nodes with dedicated boot and data disks -* Software requirements - OS (SLES SP7), CSM services like CANU, HSM, SLS, BSS, CSI, CFS, ansible playbooks for FMN +* Software requirements - OS (SLES SP7), CSM services like CANU, HSM, SLS, BSS, CSI, CFS, Ansible playbooks for FMN ## Note: * Fabric Manager Nodes (`FMNs`) can be added only after the CSM upgrade has been completed. -* By default, Fabric Manager would be running on kubernetes as a Kuberetes pod +* By default, Fabric Manager would be running on Kubernetes as a Kubernetes pod * After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. ## Post upgrade of CSM from 1.7.0 to 1.7.1 From 3ca8d56e5fc18a4019b3fe0e239f10aed870bb78 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:51:26 +0530 Subject: [PATCH 62/94] Update .spelling Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .spelling | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.spelling b/.spelling index c4ff5e9d239dd..9048d3a36d8f2 100644 --- a/.spelling +++ b/.spelling @@ -871,6 +871,10 @@ xnames zeroization zeroize zypper +Baremetal +baremetal +FabricManager +FMNs # Network Terms - Starting to organize a little bit here but it's not done 0.5m From 44293762ed91ea5596185ee6e0bb16fbf97f908a Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:08:27 +0530 Subject: [PATCH 63/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index e8df11cb9f38d..adbcdac862402 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -1,19 +1,19 @@ # Configure FM (Fabric Manager) On `Baremetal` -This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage `LUNs`, +This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage `LUNs`, and configuring the necessary networking to support Fabric Manager on `baremetal` following the CSM upgrade. - + ## Requirements * Hardware requirements - 2 bare-metal nodes with dedicated boot and data disks * Software requirements - OS (SLES SP7), CSM services like CANU, HSM, SLS, BSS, CSI, CFS, Ansible playbooks for FMN -## Note: +## Notes * Fabric Manager Nodes (`FMNs`) can be added only after the CSM upgrade has been completed. * By default, Fabric Manager would be running on Kubernetes as a Kubernetes pod * After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. - + ## Post upgrade of CSM from 1.7.0 to 1.7.1 Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. @@ -42,7 +42,10 @@ Verify that the BMC of each FMN is configured with the correct root user credent ### FMN Base Image Creation -The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps details the process for generating the FMN image. +The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. +This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. + +The following steps details the process for generating the FMN image. #### Create FMN base image (only base OS; no Fabric Manager) @@ -89,12 +92,12 @@ images: - Management_Fabric ``` -##### New FMN base image creation and uploade to S3 +##### New FMN base image creation and upload to S3 Execute the commands below on any master node to generate the new FMN image and upload it to the S3 storage. First set `bootprep` file path: - + ```bash # BOOTPREP_FILE_PATH=./fmn_bootprep.yaml ``` @@ -117,11 +120,11 @@ sat bootprep run \ After creating the FMN base image, add FMN nodes to CSM by following the Follow step 1 to step in [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-worker-storage-master-or-fmnfabric-manager-node-ncns) -After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. +After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. ### Generate Switch Configuration With CANU -For Example: +For Example: ```bash canu generate network config -a TDS --csm 1.7 --custom-config custom_switch_config.yaml --edge Arista --sls-file sls_input_file.json --ccj surtur-ccj.json --folder output (--enable-nmn-isolation --nmn-pvlan ) @@ -132,7 +135,7 @@ canu generate network config -a TDS --csm 1.7 --custom-config custom_switch_conf * TDS style systems have the management nodes plugged directly into the spine switches, most will only have a single leaf-bmc switch. * Systems that use the "Full" architecture will have the management nodes plugged into the leaf switches. -The configuration generated here will contain updates for the leaf-bmc switch(es) for the Fabric Manager node BMCs and updates to either the spine switches or the leaf switches for the bonded connection. +The configuration generated here will contain updates for the leaf-bmc switch(`es`) for the Fabric Manager node BMCs and updates to either the spine switches or the leaf switches for the bonded connection. For Example: @@ -140,7 +143,7 @@ For Example: canu validate switch config --ip 10.254.0.4 --generated output/sw-leaf-bmc-001.cfg ``` -**Note:** CANU will likely suggest the removal of the snmpv3 user, this is because the SNMP configuration is not held in the `custom_config.yaml` file because it's not permitted to store secrets in GitHub. Do NOT remove this configuration from the switch. +**Note:** CANU will likely suggest the removal of the `snmpv3` user, this is because the SNMP configuration is not held in the `custom_config.yaml` file because it's not permitted to store secrets in GitHub. Do NOT remove this configuration from the switch. Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ..." rule be sure to create the new rule before removing the old one. It is possible to lose access to the switch if the ACLs are not applied in the correct order. @@ -276,7 +279,6 @@ Aliases = [ "fmn001-chn", "time-chn", "time-chn.local",] Comment = "x3000c0s28b0n0" IPAddress = "10.102.193.206" Name = "fmn001" - ``` #### SLS hardware should list the new nodes @@ -297,7 +299,7 @@ For Example: cray sls search networks list --name NMN --format json ``` -#### HSM ethernet interfaces should be updated with the same allocated IPs +#### HSM `ethernet` interfaces should be updated with the same allocated IPs For Example: @@ -321,7 +323,7 @@ cray bss bootparameters list --hosts Global --format json #### Validate FMN required storage configuration (LVM partitions) -Check if both LVM partiions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` rescpectively on both FMN nodes (`fmn001` and `fmn002`). +Check if both LVM partitions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` respectively on both FMN nodes (`fmn001` and `fmn002`). ```bash fmn001:~ # lsblk @@ -411,8 +413,7 @@ fmn002:~ # mount | grep /opt/slingshot #### Validate addition of FM required repositories -Check if all the required repos are added on both FMN nodes (`fmn001` and `fmn002`) in order to install prerequisite OS RPMs -required during Slingshot Sftware installation. +Check if all the required repos are added on both FMN nodes (`fmn001` and `fmn002`) in order to install prerequisite OS RPMs required during Slingshot Software installation. For Example: From 5149d94d1ed1fdcc937d35ff3db6ad1a0e45679d Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:20:53 +0530 Subject: [PATCH 64/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index adbcdac862402..f2697fcadcef9 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -42,7 +42,8 @@ Verify that the BMC of each FMN is configured with the correct root user credent ### FMN Base Image Creation -The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to `ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. +The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to +`ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. The following steps details the process for generating the FMN image. @@ -171,7 +172,7 @@ Last login: Thu Dec 4 05:03:46 2025 from 10.252.1.10 ... ``` -2. Check if both FMN nodes are shown under `sat status`: +1. Check if both FMN nodes are shown under `sat status`: ```bash ncn-m001:~ # sat status | grep fmn @@ -183,7 +184,7 @@ INFO: All values for 'Most Recent Session Template' are 'MISSING', omitting key. | x3000c0s29b0n0 | fmn002 | Node | 100012 | On | OK | True | X86 | River | Management | FabricManager | Sling | True | fmn-bm-default-configuration | configured | 0 | stable | MISSING | MISSING | ``` -3. Optionally check more details on the FMN nodes +1. Optionally check more details on the FMN nodes For Example: From fecc47832cc704aba1380a03c0a23e01381a1838 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:32:00 +0530 Subject: [PATCH 65/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 37 +++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 329cd9138a34e..de39f199e44e5 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -8,56 +8,71 @@ ## Introduction -The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. The overall bare-metal Fabric Manager solution is described in the Slingshot Fabric Manager HA documentation . +The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) +that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. **NOTE**: -* `FMNs` are considered Management nodes. -* After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. -* The two `FMNs` must be part of two different management racks to support Rack Resiliency. -* This feature will not be supported on systems with Dell/ Mellanox based management networks. +- `FMNs` are considered Management nodes. +- After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. +- The two `FMNs` must be part of two different management racks to support Rack Resiliency. +- This feature will not be supported on systems with Dell/ Mellanox based management networks. ## Terminology and Components ### SHS + [Slingshot Host Software](../../glossary.md#slingshot-host-software-shs) ### FM + [Fabric Manager](../../glossary.md#fabric-manager) ### FMN + [Fabric Manager Node](../../glossary.md#fabric-manager-node) -### SLS +### SLS [System Layout Service](../../glossary.md#system-layout-service-sls) -### HSM +### HSM + [Hardware State Manager](../../glossary.md#hardware-state-manager-hsm) ### BSS + [Boot Script Service](../../glossary.md#boot-script-service-bss) ### CANU + [CSM Automatic Network Utility](../../glossary.md#csm-automatic-network-utility-canu) ### SAT + [System Admin Toolkit](../../glossary.md#system-admin-toolkit-sat) ### SMA + [System Monitoring Application](../../glossary.md#system-monitoring-application-sma) ## Architecture -In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses native Kubernetes capabilities—both during upgrades and in failure/HA scenarios. Kubernetes itself provides health checks and a scheduler that can rebalance workloads across nodes based on load, administrative policies, and other criteria. The Fabric Manager is deployed as a single pod in Kubernetes. A traditional HA model for Fabric Manager doesn’t map cleanly into Kubernetes, so instead, Kubernetes’ built-in mechanisms detect failures and spin up a replacement pod, minimizing downtime. +In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses native Kubernetes capabilities—both during upgrades and in failure/HA scenarios. +Kubernetes itself provides health checks and a scheduler that can rebalance workloads across nodes based on load, administrative policies, and other criteria. +The Fabric Manager is deployed as a single pod in Kubernetes. A traditional HA model for Fabric Manager doesn’t map cleanly into Kubernetes, so instead, Kubernetes’ +built-in mechanisms detect failures and spin up a replacement pod, minimizing downtime. -In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and recreate the Fabric Manager on another node, providing continuity. +In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and +recreate the Fabric Manager on another node, providing continuity. -In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes "best‑effort" scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. +In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes "best‑effort" scheduling and the resource demands +of the Fabric Manager, real service outages can exceed 5 minutes. -To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. +To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations +for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. ![FM On Baremetal](FM-HA-1.png) From b739ccdcee4ad8084c997c2b36bec0231f9e7d91 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:32:35 +0530 Subject: [PATCH 66/94] Update Redeploy_Fabric_Manager_Nodes.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md index 736b2afd36380..d32ddfe02d78a 100644 --- a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md +++ b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md @@ -2,7 +2,9 @@ > **OPTIONAL:** This procedure is only applicable if Fabric Manager nodes were deployed during the CSM installation. -Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment did not include the final image with all necessary components. Once the other HPE Cray EX software products have been installed via the Install and Upgrade Framework (IUF), the FMNs need to be redeployed with the new customized image. +Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment did not include the final image +with all necessary components. Once the other HPE Cray EX software products have been installed via the Install and Upgrade Framework (IUF), +the FMNs need to be redeployed with the new customized image. ## Prerequisites From 6eaa0504079c92d5130dbc30989589d015183b42 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:35:43 +0530 Subject: [PATCH 67/94] Update Add_NCN_Data.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md index 341eeb135fda2..22c3ea6b37eaf 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md @@ -361,7 +361,7 @@ The NCN MAC addresses need to be collected using the [Collect NCN MAC Addresses] generated in the [FMN base image creation stage](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). For Example: Base image id of FMN is `06135c73-bcd9-4d38-928f-ada20bdf6a6` - + ```bash cd /usr/share/doc/csm/scripts/operations/node_management/Add_Remove_Replace_NCNs/ ./add_management_ncn.py ncn-data \ @@ -373,7 +373,7 @@ The NCN MAC addresses need to be collected using the [Collect NCN MAC Addresses] --mac-lan0 b8:59:9f:d9:9d:e8 \ --mac-lan1 b8:59:9f:d9:9d:e9 ``` - + 1. (`ncn-mw#`) Run the `add_management_ncn.py` script again, adding the `--perform-changes` argument to the command run in the previous step: > ***NOTE*** Depending on the networking configuration of the system the CMN or CAN networks From 17c6544ce1cbbda52b01dd1ed2b4b9e8cb1e6148 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:39:03 +0530 Subject: [PATCH 68/94] Update Add_Remove_Replace_NCNs.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index 909e4ec1aab2b..122bab38c6d57 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -39,7 +39,7 @@ The latest CSM documentation has been installed on the master nodes. See [Check ``` Note: For adding FMNs (Fabric Manager Nodes) to CSM there is a new prompt added to confirm if the node getting added is an FMN or not: - + ```text Please answer with yes or no. Are the NCNs to be added are Fabric Manager Nodes (FMNs)? [y/N] @@ -161,7 +161,7 @@ The latest CSM documentation has been installed on the master nodes. See [Check Restarting cray-dhcp-kea ``` - + ## Add worker, storage, master or FMN(Fabric Manager Node) NCNs Use this procedure to add a worker, storage, master or FMN NCN. From db79f27816b29df525fe7a8e496fcbccc1b29bb4 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:40:58 +0530 Subject: [PATCH 69/94] Update Boot_NCN.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md b/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md index c4bcd374c9ee5..7fc447304b469 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Boot_NCN.md @@ -263,5 +263,6 @@ Follow [Add Ceph Node](../../utility_storage/Add_Ceph_Node.md) to join the added Proceed to [Redeploy Services](Redeploy_Services.md) or return to the main [Add, Remove, Replace, or Move NCNs](Add_Remove_Replace_NCNs.md) page. -**Note: ** +**Note:** + * For FMN nodes we can skip rest of the steps. From 8388ef2ca891d624dc9683f5363e9af1bd39e655 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:08:54 +0530 Subject: [PATCH 70/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index de39f199e44e5..81175204533ff 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -8,7 +8,7 @@ ## Introduction -The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) +The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. @@ -62,16 +62,16 @@ CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS i In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses native Kubernetes capabilities—both during upgrades and in failure/HA scenarios. Kubernetes itself provides health checks and a scheduler that can rebalance workloads across nodes based on load, administrative policies, and other criteria. -The Fabric Manager is deployed as a single pod in Kubernetes. A traditional HA model for Fabric Manager doesn’t map cleanly into Kubernetes, so instead, Kubernetes’ +The Fabric Manager is deployed as a single pod in Kubernetes. A traditional HA model for Fabric Manager doesn’t map cleanly into Kubernetes, so instead, Kubernetes’ built-in mechanisms detect failures and spin up a replacement pod, minimizing downtime. -In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and +In theory, this model should satisfy HA requirements: if the pod fails (or needs to be moved during an upgrade), Kubernetes can detect the fault and recreate the Fabric Manager on another node, providing continuity. -In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes "best‑effort" scheduling and the resource demands +In practice, however, this approach does not meet the contractual HA obligations. Because of Kubernetes "best‑effort" scheduling and the resource demands of the Fabric Manager, real service outages can exceed 5 minutes. -To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations +To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. ![FM On Baremetal](FM-HA-1.png) @@ -82,5 +82,5 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem ## Slingshot Switch Firmware Update -* For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). -* For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in _HPE Slingshot Installation Guide for CSM_ PDF. +- For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). +- For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in _HPE Slingshot Installation Guide for CSM_ PDF. From 9cc8081262365f0df7cfe76e9dffa6d64c894efa Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:10:47 +0530 Subject: [PATCH 71/94] Update Redeploy_Fabric_Manager_Nodes.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md index d32ddfe02d78a..7e8a06bf02f85 100644 --- a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md +++ b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md @@ -2,8 +2,8 @@ > **OPTIONAL:** This procedure is only applicable if Fabric Manager nodes were deployed during the CSM installation. -Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment did not include the final image -with all necessary components. Once the other HPE Cray EX software products have been installed via the Install and Upgrade Framework (IUF), +Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment did not include the final image +with all necessary components. Once the other HPE Cray EX software products have been installed via the Install and Upgrade Framework (IUF), the FMNs need to be redeployed with the new customized image. ## Prerequisites From 2637fc826efde0ccd838fa617eb6429b97e82efc Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:20:07 +0530 Subject: [PATCH 72/94] Update Add_Remove_Replace_NCNs.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index 122bab38c6d57..1c0e551e7a7d7 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -11,7 +11,7 @@ Add, remove, replace, or move non-compute nodes (NCNs). This applies to worker, The following workflows are available: * [Prerequisites](#prerequisites) -* [Add worker, storage, or master NCNs](#add-worker-storage-or-master-ncns) +* [Add worker, storage, master or FMN NCNs](#add-worker-storage-master-or-fmn-ncns) * [Add NCN prerequisites](#add-ncn-prerequisites) * [Add NCN procedure](#add-ncn-procedure) * [Remove worker, storage, or master NCNs](#remove-worker-storage-or-master-ncns) @@ -162,9 +162,9 @@ The latest CSM documentation has been installed on the master nodes. See [Check Restarting cray-dhcp-kea ``` -## Add worker, storage, master or FMN(Fabric Manager Node) NCNs +## Add worker, storage, master or FMN NCNs -Use this procedure to add a worker, storage, master or FMN NCN. +Use this procedure to add a worker, storage, master or FMN (Fabric Manager Node) NCNs. ### Add NCN prerequisites From 5f227aecf56646cf44e3514d15e115a1728e2b70 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:22:10 +0530 Subject: [PATCH 73/94] Update Add_Remove_Replace_NCNs.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index 1c0e551e7a7d7..c0955baa0065c 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -267,4 +267,4 @@ In general, scaling master nodes is not recommended because it can cause Etcd la The following is a high-level overview of the replace NCN workflow: 1. [Remove Worker, Storage, or Master NCNs](#remove-worker-storage-or-master-ncns) -1. [Add Worker, Storage, or Master NCNs](#add-worker-storage-or-master-ncns) +1. [Add worker, storage, master or FMN NCNs](#add-worker-storage-master-or-fmn-ncns) From 185ad35c09bec27d00edf4b9644ed4863d0f0fe4 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 4 Jun 2026 12:25:04 +0530 Subject: [PATCH 74/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- upgrade/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upgrade/README.md b/upgrade/README.md index f6ebd44fdd679..f78386ee6d41e 100644 --- a/upgrade/README.md +++ b/upgrade/README.md @@ -9,7 +9,7 @@ software. Choose the appropriate procedure from the sections below. * [Option 2: Upgrade only additional HPE Cray EX software products](#option-2-upgrade-only-additional-hpe-cray-ex-software-products) * [Option 3: Upgrade only CSM](#option-3-upgrade-only-csm) * [CSM patch version upgrade](#csm-patch-version-upgrade) -* [FM On Baremetal](#fm_on_baremetal) +* [FM On Baremetal](#fm-on-baremetal) ## Release Notes From 23a049d5323b822826366f59bc2ee86d123dae3a Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Mon, 8 Jun 2026 14:03:03 +0530 Subject: [PATCH 75/94] Apply suggestions from code review Co-authored-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 +- operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 81175204533ff..22cd4bf775e49 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -9,7 +9,7 @@ ## Introduction The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) -that manage and monitor Slingshot fabric operations outside of a Kubernetes environment. +that manage and monitor Slingshot fabric operations outside of the Kubernetes environment. CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. diff --git a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md index 7e8a06bf02f85..64ae3f56826d6 100644 --- a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md +++ b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md @@ -2,7 +2,7 @@ > **OPTIONAL:** This procedure is only applicable if Fabric Manager nodes were deployed during the CSM installation. -Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment did not include the final image +Although Fabric Manager Nodes (FMNs) were deployed during the CSM installation, the initial deployment would not include the final image with all necessary components. Once the other HPE Cray EX software products have been installed via the Install and Upgrade Framework (IUF), the FMNs need to be redeployed with the new customized image. From aeee076821b5fe80cce468197f017190d171ee20 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Wed, 10 Jun 2026 00:46:19 +0530 Subject: [PATCH 76/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index f2697fcadcef9..dff1c8818164e 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -42,15 +42,13 @@ Verify that the BMC of each FMN is configured with the correct root user credent ### FMN Base Image Creation -The FabricManager subrole has been introduced to facilitate FMN node discovery and configuration. Corresponding updates have been made to -`ncn_nodes.yaml` and `ncn_initrd.yaml` to support customization of the FMN base image— a non-Kubernetes image containing only essential artifacts. -This customization is performed using the `csm.fm.baremetal` Ansible role, executed under the `Management_FabricManager` host. +The FMN base image creation process supports node discovery, configuration, and base image customization. The base image contains only the essential artifacts required for deployment. The following steps details the process for generating the FMN image. #### Create FMN base image (only base OS; no Fabric Manager) -Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. [See](../configuration_management/Management_Node_Image_Customization.md) +Adapt and customize the current NCN Kubernetes image for compatibility with FMN node requirements. ##### FMN Boot Preparation From f866d30bbd9c12d0d8400b096ef75b9acea64075 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Wed, 10 Jun 2026 00:46:34 +0530 Subject: [PATCH 77/94] Apply suggestions from code review Co-authored-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index dff1c8818164e..871d5e6094311 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -54,7 +54,7 @@ Adapt and customize the current NCN Kubernetes image for compatibility with FMN Create `sat bootprep` configuration file (`fmn_bootprep.yaml`) for FMN as below. -**Note:** Ensure that the `fmn_bootprep.yaml` configuration file is updated with the official released versions before proceeding. +**Note:** Ensure that the `fmn_bootprep.yaml` configuration file is updated with the official CSM released versions and the appropriate commits on playbooks before proceeding. For Example: @@ -95,13 +95,13 @@ images: Execute the commands below on any master node to generate the new FMN image and upload it to the S3 storage. -First set `bootprep` file path: +(ncn-m#) First set `bootprep` file path: ```bash # BOOTPREP_FILE_PATH=./fmn_bootprep.yaml ``` -Now execute the `sat bootprep run` command below to generate the new base image and upload it to S3. +(ncn-m#) Now execute the `sat bootprep run` command below to generate the new base image and upload it to S3. ```bash sat bootprep run \ From c2b1e3f7d2fdf72ba4cfa99351e3b08f41f0e029 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Wed, 10 Jun 2026 00:48:01 +0530 Subject: [PATCH 78/94] Apply suggestions from code review Co-authored-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 871d5e6094311..89a6abb09a32a 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -119,7 +119,7 @@ sat bootprep run \ After creating the FMN base image, add FMN nodes to CSM by following the Follow step 1 to step in [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-worker-storage-master-or-fmnfabric-manager-node-ncns) -After completion of the NCN add procedure, SLS, HSM, and BSS will contain the corresponding FMN data. +Upon completion of the NCN add procedure, the corresponding FMN entries will be populated in SLS, HSM, and BSS. The required network, storage and other cloud-init configurations are added to BSS and would be applied when the FMN node boots. ### Generate Switch Configuration With CANU @@ -298,7 +298,7 @@ For Example: cray sls search networks list --name NMN --format json ``` -#### HSM `ethernet` interfaces should be updated with the same allocated IPs +#### HSM ethernetInterfaces should be updated with the same allocated IPs For Example: From 8504727514def13d27a75d95fa162012cee327f4 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Wed, 10 Jun 2026 01:10:32 +0530 Subject: [PATCH 79/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 87 +++---------------- 1 file changed, 13 insertions(+), 74 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 89a6abb09a32a..11267ea73615d 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -208,28 +208,6 @@ Arch = "X86" Class = "River" ``` -```bash -ncn-m001:~ # XNAME=x3000c0s29b0n0 -``` - -```bash -ncn-m001:~ # cray hsm state components describe "${XNAME}" --format toml -``` - -```text -ID = "x3000c0s29b0n0" -Type = "Node" -State = "On" -Flag = "OK" -Enabled = true -Role = "Management" -SubRole = "FabricManager" -NID = 100012 -NetType = "Sling" -Arch = "X86" -Class = "River" -``` - #### Validate FMN required networking configuration Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). @@ -254,6 +232,12 @@ IPAddress = "10.1.1.10" Name = "fmn001" ... +[[results.ExtraProperties.Subnets.IPReservations]] +Aliases = [ "fmn001-nmn", "time-nmn", "time-nmn.local", "x3000c0s28b0n0", "fmn001.local",] +Comment = "x3000c0s28b0n0" +IPAddress = "10.252.1.12" +Name = "fmn001" + [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn-vip.local",] Comment = "fmn-virtual-ip" @@ -261,10 +245,10 @@ IPAddress = "10.252.1.13" Name = "fmn-vip" [[results.ExtraProperties.Subnets.IPReservations]] -Aliases = [ "fmn001-nmn", "time-nmn", "time-nmn.local", "x3000c0s28b0n0", "fmn001.local",] -Comment = "x3000c0s28b0n0" -IPAddress = "10.252.1.12" -Name = "fmn001" +Aliases = [ "fmn001-mgmt",] +Comment = "x3000c0s28b0" +IPAddress = "10.254.1.21" +Name = "x3000c0s28b0" [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn001-hmn", "time-hmn", "time-hmn.local",] @@ -288,8 +272,6 @@ For Example: cray sls hardware describe x3000c0s28b0n0 ``` -#### IPs should be allocated and made available for FMNs in all of SLS networks - **Note:** NMN and HMN should be having additional FMN VIPs also allocated. For Example: @@ -367,47 +349,12 @@ fmn001:~ # mount | grep /opt/slingshot /dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) ``` -```bash -fmn002:~ # lsblk -``` - -```text -NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS -loop0 7:0 0 2.2G 1 loop /run/rootfsbase -sda 8:0 0 3.5T 0 disk -├─sda1 8:1 0 476M 0 part -│ └─md127 9:127 0 475.9M 0 raid1 /metal/recovery -├─sda2 8:2 0 22.8G 0 part -│ └─md126 9:126 0 22.8G 0 raid1 /run/initramfs/live -├─sda3 8:3 0 139.7G 0 part -│ └─md125 9:125 0 139.6G 0 raid1 /run/initramfs/overlayfs -└─sda4 8:4 0 139.7G 0 part - └─md124 9:124 0 279.1G 0 raid0 - ├─metalvg0-SCFIRMWARE 254:0 0 80G 0 lvm /opt/cray/FW/sc-firmware - └─metalvg0-SLINGSHOT 254:1 0 120G 0 lvm /opt/slingshot -sdb 8:16 0 3.5T 0 disk -├─sdb1 8:17 0 476M 0 part -│ └─md127 9:127 0 475.9M 0 raid1 /metal/recovery -├─sdb2 8:18 0 22.8G 0 part -│ └─md126 9:126 0 22.8G 0 raid1 /run/initramfs/live -├─sdb3 8:19 0 139.7G 0 part -│ └─md125 9:125 0 139.6G 0 raid1 /run/initramfs/overlayfs -└─sdb4 8:20 0 139.7G 0 part - └─md124 9:124 0 279.1G 0 raid0 - ├─metalvg0-SCFIRMWARE 254:0 0 80G 0 lvm /opt/cray/FW/sc-firmware - └─metalvg0-SLINGSHOT 254:1 0 120G 0 lvm /opt/slingshot -sdc 8:32 0 3.5T 0 disk -sdd 8:48 0 3.5T 0 disk -``` +#### Join Fabric Manager nodes to Spire -```bash -fmn002:~ # mount | grep /opt/cray/FW/sc-firmware -/dev/mapper/metalvg0-SCFIRMWARE on /opt/cray/FW/sc-firmware type ext4 (rw,relatime,stripe=256) -``` +After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. ```bash -fmn002:~ # mount | grep /opt/slingshot -/dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) +ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh ``` #### Validate addition of FM required repositories @@ -434,14 +381,6 @@ Repository priorities are without effect. All enabled repositories share the sam ... ``` -#### Join Fabric Manager nodes to Spire - -After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. - -```bash -ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh -``` - ### Install Fabric Manager on FM baremetal nodes For install/ upgrade Fabric Manager on the FMNs please refer section "3 Install HPE Slingshot Fabric Manager software on bare metal servers" in _HPE Slingshot Installation Guide for CSM_ PDF. From cf22db01391c8a005be767eefbc93b0f8ea88681 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Wed, 10 Jun 2026 01:21:11 +0530 Subject: [PATCH 80/94] Update Redeploy_Fabric_Manager_Nodes.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Redeploy_Fabric_Manager_Nodes.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md index 64ae3f56826d6..82655dd7379a9 100644 --- a/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md +++ b/operations/fm_on_baremetal/Redeploy_Fabric_Manager_Nodes.md @@ -78,16 +78,23 @@ Once the new FMN image has been built and uploaded to S3, update the boot parame For each FMN, set `metal.no-wipe=0`: + For Example: + ```bash - TARGET_XNAME= + TARGET_XNAME=x3000c0s28b0n0 csi handoff bss-update-param --set metal.no-wipe=0 --limit ${TARGET_XNAME} ``` - For example: + Expected output: ```bash - TARGET_XNAME=x3000c0s28b0n0 - csi handoff bss-update-param --set metal.no-wipe=0 --limit ${TARGET_XNAME} + 2026/06/05 11:33:35 TOKEN was not set. Attempting to read API token from Kubernetes directly ... + 2026/06/05 11:33:35 Getting management NCNs from SLS... + 12 + 2026/06/05 11:33:35 Done getting management NCNs from SLS. + 2026/06/05 11:33:35 Updating NCN kernel parameters... + 2026/06/05 11:33:35 Successfully PUT BSS entry for x3000c0s28b0n0 + 2026/06/05 11:33:35 Done updating NCN kernel parameters. ``` Repeat for each FMN. From 640c6194e3b7a6c04d4ad1fd4a283f3b109f340a Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Wed, 10 Jun 2026 01:25:22 +0530 Subject: [PATCH 81/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 22cd4bf775e49..341f0d1550668 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -13,6 +13,8 @@ that manage and monitor Slingshot fabric operations outside of the Kubernetes en CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. +Please note that this feature is only available from CSM 1.7.1 and onwards. + **NOTE**: - `FMNs` are considered Management nodes. From f261b9e797913c1555b8aa02d4a4902b0d88369f Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Thu, 11 Jun 2026 23:46:54 +0530 Subject: [PATCH 82/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../fm_on_baremetal/Configure_FM_On_Baremetal.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 11267ea73615d..0e154b1c621d4 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -117,11 +117,13 @@ sat bootprep run \ ### Add FMN Nodes to CSM -After creating the FMN base image, add FMN nodes to CSM by following the Follow step 1 to step in [NCN add procedure](../../operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-worker-storage-master-or-fmnfabric-manager-node-ncns) +After creating the FMN base image, add FMN nodes to CSM by following the below steps: -Upon completion of the NCN add procedure, the corresponding FMN entries will be populated in SLS, HSM, and BSS. The required network, storage and other cloud-init configurations are added to BSS and would be applied when the FMN node boots. +#### Step 1: Allocate NCN IP Addresses -### Generate Switch Configuration With CANU +Follow the procedure defined at [Allocate NCN IP Addresses](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/node_management/Add_Remove_Replace_NCNs/Allocate_NCN_IP_Addresses.md#allocate-ncn-ip-addresses) + +#### Step 2: Generate Switch Configuration With CANU For Example: @@ -129,7 +131,7 @@ For Example: canu generate network config -a TDS --csm 1.7 --custom-config custom_switch_config.yaml --edge Arista --sls-file sls_input_file.json --ccj surtur-ccj.json --folder output (--enable-nmn-isolation --nmn-pvlan ) ``` -#### Validate the generated switch configuration against the network switches +#### Step 3: Validate the generated switch configuration against the network switches * TDS style systems have the management nodes plugged directly into the spine switches, most will only have a single leaf-bmc switch. * Systems that use the "Full" architecture will have the management nodes plugged into the leaf switches. @@ -146,9 +148,11 @@ canu validate switch config --ip 10.254.0.4 --generated output/sw-leaf-bmc-001.c Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ..." rule be sure to create the new rule before removing the old one. It is possible to lose access to the switch if the ACLs are not applied in the correct order. -## FMN Booting +#### Step 4: FMN Booting + +Upon completion of the FMNs add procedure, the corresponding FMN entries will be populated in SLS, HSM, and BSS. The required network, storage and other cloud-init configurations are added to BSS and would be applied when the FMN node boots. -Once the FMNs have been added to the CSM, proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image. [See](../node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn) +Proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image.[Boot NCN](../node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn). ## FMN Post Boot From 30cd70d2dd9232040d68c5b9230d9f16766e9339 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 12 Jun 2026 00:06:45 +0530 Subject: [PATCH 83/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 0e154b1c621d4..a8a3b47d9a046 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -284,7 +284,7 @@ For Example: cray sls search networks list --name NMN --format json ``` -#### HSM ethernetInterfaces should be updated with the same allocated IPs +#### HSM `ethernetInterfaces` should be updated with the same allocated IPs For Example: From c9887f5ee77500c7ff2959700e9f8188db8ac3c2 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Fri, 12 Jun 2026 00:20:22 +0530 Subject: [PATCH 84/94] CASM-5740: FM migration on baremetal nodes (FMNs) - Documentation for FM migration to bare metal node(s). --- .../Configure_FM_On_Baremetal.md | 39 +++++++------- operations/fm_on_baremetal/FM-HA-1.png | Bin 26972 -> 0 bytes operations/fm_on_baremetal/README.md | 50 +++++------------- upgrade/README.md | 3 +- 4 files changed, 34 insertions(+), 58 deletions(-) delete mode 100644 operations/fm_on_baremetal/FM-HA-1.png diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index a8a3b47d9a046..ef6bc189fd703 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -21,8 +21,7 @@ Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable F * Step 1: [FMN Prerequisites](#fmn-prerequisites) * step 2: [FMN Pre Boot](#fmn-pre-boot) * [FMN Base Image Creation](#fmn-base-image-creation) - * [Add FMN Nodes to CSM](#add-fmn-nodes-to-csm) - * [Generate Switch Configuration With CANU](#generate-switch-configuration-with-canu) + * [Add NCN Procedure](#add-ncn-procedure) * Step 3: [FMN Booting](#fmn-booting) * Step 4: [FMN Post Boot](#fmn-post-boot) * [Validation](#validation) @@ -115,15 +114,19 @@ sat bootprep run \ **Note:** Using the `--overwrite-images` option in the command above will overwrite any previously uploaded images in S3. -### Add FMN Nodes to CSM +**Note:** After creating the FMN base image above, follow below steps to add FMN nodes to CSM: -After creating the FMN base image, add FMN nodes to CSM by following the below steps: +### Add NCN procedure -#### Step 1: Allocate NCN IP Addresses +#### Allocate NCN IP Addresses -Follow the procedure defined at [Allocate NCN IP Addresses](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/node_management/Add_Remove_Replace_NCNs/Allocate_NCN_IP_Addresses.md#allocate-ncn-ip-addresses) +Follow [`Step-1`](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) -#### Step 2: Generate Switch Configuration With CANU +#### Add NCN data + +Follow [`Step-3`](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) + +#### Generate Switch Configuration With CANU For Example: @@ -131,7 +134,7 @@ For Example: canu generate network config -a TDS --csm 1.7 --custom-config custom_switch_config.yaml --edge Arista --sls-file sls_input_file.json --ccj surtur-ccj.json --folder output (--enable-nmn-isolation --nmn-pvlan ) ``` -#### Step 3: Validate the generated switch configuration against the network switches +#### Validate the generated switch configuration against the network switches * TDS style systems have the management nodes plugged directly into the spine switches, most will only have a single leaf-bmc switch. * Systems that use the "Full" architecture will have the management nodes plugged into the leaf switches. @@ -148,11 +151,11 @@ canu validate switch config --ip 10.254.0.4 --generated output/sw-leaf-bmc-001.c Take extreme care when manipulating ACLs, if CANU suggests moving a "permit any ..." rule be sure to create the new rule before removing the old one. It is possible to lose access to the switch if the ACLs are not applied in the correct order. -#### Step 4: FMN Booting +## FMN Booting Upon completion of the FMNs add procedure, the corresponding FMN entries will be populated in SLS, HSM, and BSS. The required network, storage and other cloud-init configurations are added to BSS and would be applied when the FMN node boots. -Proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image.[Boot NCN](../node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn). +Proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal base image [Boot NCN](../node_management/Add_Remove_Replace_NCNs/Boot_NCN.md#boot-ncn). ## FMN Post Boot @@ -306,6 +309,14 @@ cray bss bootparameters list --format json --name x3000c0s28b0n0 cray bss bootparameters list --hosts Global --format json ``` +#### Join Fabric Manager nodes to Spire + +After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. + +```bash +ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh +``` + #### Validate FMN required storage configuration (LVM partitions) Check if both LVM partitions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` respectively on both FMN nodes (`fmn001` and `fmn002`). @@ -353,14 +364,6 @@ fmn001:~ # mount | grep /opt/slingshot /dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) ``` -#### Join Fabric Manager nodes to Spire - -After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. - -```bash -ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh -``` - #### Validate addition of FM required repositories Check if all the required repos are added on both FMN nodes (`fmn001` and `fmn002`) in order to install prerequisite OS RPMs required during Slingshot Software installation. diff --git a/operations/fm_on_baremetal/FM-HA-1.png b/operations/fm_on_baremetal/FM-HA-1.png deleted file mode 100644 index 996d45bc821ed74aa811ba4d393cdfcd251f26ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 26972 zcmc$GcRben|9)U=TbdDau~I<%+eGj5loY_^7qka_R1bdZ_TQA_eN=y%nj*r5hlHkTgW}{@buh}%m4f1Kd<1%3a~V0DDB*e`}-GoDg_x< zz--!Tg5K|;+>xt$#B>?6PO9M=TT5t%`7<`SjO{es%U=0ThxtD@Pt$i7tQt6dm1At@ zOD3qoslvhWJon{VzX7&dqQy`uN7VJ$3t^b8HHXT^d<-$8yuj@~K@Rbi+LPmW9@D34 zI7Zk;*w~dg_El>wdM23DIMF&k(wDa`t-?Q6UDqBSZS|MmbQLVU#lLtDRi^syTHRvq zUJbJB>vdQ9dsQ@eP&zkJ&CF@59QI>+{Qp}mO*B~wO<%HL=0&U{W@MG&1}iCy_t~!6 zlcSZ}sU~;b=QFfzifkuH15*#DSbIbgjgJ;|tF0p>_pPc}9M90Yn&8*&%_Op}aqr`u zP41FBSlaDy@aAuR?>#3&7cw1Z>>aXpe6-@jdU5P{yF81c+{ji@sP3Jl_+ho-Y%A6o>h8sTd}egB455T(uo|JioN+MQ94J<~ofpZ($oICkp>58h>%?TkE$gt>F4xK0G?xlPARc8aYenc2CF zy3GoY`;$(@=o%$(*jE>{F)Y&my|&M#YXdA(R`}XENDr3Fjq|C*Tx@B3Kh};vqxzPu zeTkBoiI_1xEP5hB?3ZxWOR?<%bMuTC9envU9esAjgge?~<8aT{}7 z(?lqD%0xJ=glLM__TVP=N%D#(tDI?5gl1Oxn73G8KvkPx6`jxF8d=upOw~rGa()7R zN0-m>hT`k*54sGWjKnN_w&GoWb0qew!n_KWAnHw*`kY^BzmZreX19FQ-3I^8lM!dX zuM7P~9-PUHHE*RDKLpaa=v|pyZ6qYz_tzbN-X-|MHfo(-O7D4rj>H;kt;~55rtkWe z$O@#%o{ryWTt+bzkHxdex-K?a!XxjC&;_E7g0SEK7&tb+Qd< z$|lv(E0U_(cPJnSGmMRzXf{E=i=+?`sb|*p$n=jsd|<k7m zZvG{bDdDm^t?N&W@WR{lr*vKFo)e^N0o)RGKn>An1CCHSF6kq>Dmh~NHymUS@kp)ZM&}S{wcvD_ZRx9hlvKu%YKf%id z3OtPj{BP#_l)0;;dlae7rbCSn%?v!2OAA%H&S9f)>KjXri`uyog{RF@wqI~FaL<19 ziIjdr+Z*F)UwgD2O}Iy|wAi;1E;OpJHEfycWmYzLUpa@na_+Us{=v!d%8C2phwd9^ zGv+6ZB2%#UXU*vj^d?k|>t+JTjd1067Xs*C5l`_j7~5O97b&0E7^?fttM4BMF-9nC z8Ds`>_YAw$KGv4qv&kuxhr=tj7VhB5_^V~H>{QMV#^ZcgL5gb+ZXK@poP^G% z`bhG6fQ@J;6Js{IdUu5V@g z;@oc-SCycpdd68$3N~dj>2YUpl)9uy^Nk7y>Rrtm@Mh-49fD`Q^@yX{8xE*sandEh zIhvE?295WqiE(mbn|$7TpPlC}HSYW_W;;Pg?~y62kLWJzYja;CRr3m8Z!z2uxag~mK(6@R^nQd9Ec zJ2~9Q+6d*Y5ni42-`14umy!5|t2=b&<~ND`x%B!^m)*W)fj4qf;qMSYZ;2kx)lSFW zoSLtl_>4B&J=?I35wf zt#4UO)0DaOk<5Vx&}BfOL3 znY2k489@kWm^P4$#E)q5>yr<#$|9j^xV zctd1S6*`qvP?h$=T?=Q8lIFe?Ggs4~d8EX*IhOQj+*`3CQ>c$ddPa1yM9&F(HkTa3 z4b+<&uMMU42J8k2WqzTwn;q)N7ZZNG%=1c3JnvmqgfiC$CL`V=%Uj8dF`A#Jz81Ij zOFpLAPEuQ!T6r?WuPB`w<$lzzz@A>bd5-arX~a4!`RR);chjbxzZVtf1}tjSyDD%* zS04#2m-MJ|KupG1Gh$dj>T`4_sz zURzBu^VUt4)deE6Nl{bQCGpt)bRGtb{n|bUQCs<8`|G{pLTm;b2P?Hc5T(S)C~?K| zXcjs?u`U{Jh!&9rD1`mucUL@_4Wq!YguQdwj^UcspVxK!@-6z%v;ERhyOmISXIL-a zP)NmB<-Ky|Gxj{vRvVJEQfu^MpRTjb!1( zqTlBo8w5mqvU9kns@4Oqo&42#vy3P)h$E9XVZtJu-5O`&J1kN(LA~$g`&VMvotwzh z_0W77?NJ9gg_G1inIa zar(<@`ibt{*Uw~}NfW+lP+)0C??H>c@w3qW`G&k9<5o@#`Rmx@>jT9)B@R^_?cZb+ zgS`By7?<6WII?M5WK^2|GBi-nVV$PdU2%Us7SuC#eD4m`^)H{8(u=0*-|`5V_mYpd zzq0aY7rJh6)vcEQhFYyd`H0KA6}N6S@|>_VK9Y{NOl~@rhp+NCCxsob)k|lWpJAJN z9mFN3K)xrSm${x!cf=-d zJ3WoGIoNVmJQ80THE>DR-_OM3%e~>72wLqttr%2HM{dY~JFK)ah%=ATb;_c9Y%4Ddv(0M5WZI>0pj)#ZBW>G|Aj+`hN)fV1iKVy~ zq>?$He>Q9`bIkP9C5`W|7?P|RQSr{j2I^A*?M1=E!g(_kiWYV(c9!TE;nIC_`Y-!a zo_MDnAM0{+MD~nH^zTO7H$9koSN^;Hal%Ei@yy!^Aya|xE@3xNBr}9b%;IVoIX3YU z1`+AZ$Imbl@HSP7dZ)#&w-<4!>hifaI$hSr9t3iEU)kq zw(p3i-Jn$oo@_1QoOK(az)Vorhv=E2V@tz&v3Ns2U8d2_QE}A1sD7G}La#Q^cf#6~ z_Bf3z2!B1D=FQ?_%}(`72J1?2Cr3NdXE> z0J)iP>b^Rb-$22=r!@g*)8C3$$Rt!wCFp@btRg48+#Su`_YLN!lV1X zbT-Px9B1v_t*9oV9zngl zjd~xI^gl@5xJK^t!I;sbi6lL%bV5r$_O^q1Zsk_T)%GHRf_mQQM+O{PEOK^X;pVl~X-G zW5$CtuUvRam=u3&d4hN3*g7K(QeUWc&+0d2h#F)~aAlc02t2enR zi7oeVg~T|b1}x$XpC;0N$C=HiTddvV4fr!ND$KJ0)_hSj8htwB>u(X`aR{hyoD2;jvQbIglue@go2(l7Az%5m_b)m-oE zU)0ZR=cBxrKv1NDqtD`p06cA%+wVQMNN!BKPNQ3Cl|h=4!Dt)6+?|WI{o_0p(g-o% z@VoFxS^WP1J_zW8{FCf-Zu6_PpD3r<1pJ+$p+xQ9)s#k+;<1U_6!Xf$`U|536@IV5 zkfQYXpxS;c=idYKMOOyZ6>vSsR*m@|oMpv`byNHY?$~Q&&1Lh$WWm--j{rppor5)yKhn8S{w9bvF6Q#GqxW>Iynf$GjmG65Chfeczly zQkN0Pj)x4n_TzrER01{R+sOv+lGZR=Op`L2WDek-!Nb7D@1=aJr(JrYIPlsK| zRxWIO;a5NWc}8mP9IIWd>Y>0`BcW8%H}TDGnd;OHW^uR5KTb{&mmqQKOOz;@;H58G z6jNuDUwtuk?~CQc>rk$cb3(&B-T;=gKzT(8wS;9Dos{IYH9&p5@#z3?IfOJZ5~=RI z4=3qbp{R^+c#Qen2DjC5{f_6B+LWCi)P6523yFrS+SE@SSZESdP&#;0yPCR0FQQ@a#5} zP})oI^g;lCnjm{lKHTU47=c{Z(D`=N!LN_jU2e29e{x3_IPpnIiV2=M;0(IeQQ{o3 zBxvS?2lnCDz6T*NSRs^|SwGKmRa}-l+3RYhv`l)@xp8hS#cd`*fZ=nE)|VN_;oo*v zcLp|*u3#0;*BlB3rL@C?X>E0hm~Gop*U50{0Ypxq^qkZIFXCqgddK>F_`mqV5}qj=QmP4`w!DvRBA}c=B$`> zxZhtJk?*~g&Fj87C+*J#k+X2>rE&`_IK^4c5{;$8R$AFmBn&QZoSTR?{~lIk3<%}m z{2yEx2hQ<19(TedUv_4Vdnn_Bp?pdQCM)d8yENUrPccKqKhm!!|MIabX^SEn+KVCiVEX;|VdgbFv4d(5%u+Nxf z$PxmV`l8*zRd{H+c7H|NCZXRG#RGhiU6aUv<=6{Of8@KYl*)!yncqH{f(@W<>9&MF zCm$yH439z+alSXMv;M~oOd*yXF@R#y2=zVwe9ha5<|5C1zaT?eAl7jGo^BqGY~3C7=5H5| z@rG!zO_hN6pX+dLq$M;wzC4}yXYKwRD`g~hy-Byr1amrvwoOMKVg~nK{OFUD{Lgzy z<|t4!+~Q)HS++s{{>CN(<7*S^UO>zVf^jyoj%jhQF643^gukGk(=!dYz<{s=ue{HN zQSiR5Ca=g)PCJjaoa!Rg{wxBso!?1kO!XAPsV}Y9=cc&`0BwH(swGLEqLe0rMzFw{ zd-u3VtoX2&jiWHHM(&|Ye4^;BvjZE7b9WgZrMG)SITBfWwCuoZ+JH_bz;QnJkj7D> zbZ@=P6|a}scVsd%QKJ89zg_lH>a^3Z6@q@^e(jOauDUW!-?>|Y9IO>(Q4&`Q!h1#)an*XmJPdKTv)BVVqAVj{BEx$JO98!`BATT&GpTKnO_}SyyBp$%$K~E;AS~^hM*{iqSfH0fx3A7 z%w6K$#cvm0_-^M@Y%{crX44MUG8~r*>97V^nkV60Sd6`EPkGCoR5^_nOU_b4^jT>l zTsk7r{bG7YQ0bsQ^tHel{H}{GF~*qSa{LCaNy9%_b&WH@vsSE+4NJu(KJsAWB!k2E5P<$ zM+tmD|JSq?!GEw$k0<ioihYyKV*WK9X;ui7E@t^dEO5a8VUyCr!Dj9J z9lrkK6%^7EaS1~0ALQEEiD0~TMuc1jOarlEJSM}1T5d}1f5pnU_5!gk7t-$I(vN32 zYJ+@-4X(cRQjWD4vjkwcY{Hk7kWFUfu3UASyA$N8-{ zHNCkGCe;NsjuLUwS8?^^3-L^D7O)OP)LhFY&pcXGZhwp?%O+lpzd+&&A$rmm`+ShP z_j;7$XZEb7TiU99RJC^~p2dFjl)nP#|U2Z@zw& z*dAaU_1x&VrM{qC{qBS@QvBsa@n7dk|As1krLR#1O6A?5*j6|=Lz9etK+5)ZHOJ6Y z=8nuM&5Ez`RL$ZWUq~aRj$IMd-4Qa679`I=o97m+O}4hSv8GZ(qGD_NVLH~a<68e4 z{91_Jg}^bj08B*S7eU5=#Y*S{Jt(w9v?s_LID|oLmU7KB?S(SvYIWcj~YSn=an9eQ{X3*Wr*k@05w4 z`WsWa9}*sSiumn*7(*z7I!*G-kv77ehO^vFu=Aw7QTu%7)Q_ysEYo{e7l`}pz+iSr zko_&78c4rLp!g3KK)8gvjsBs@t#Z&`-5eT*Zzt_* zxg$Ootaz<>{7!Heg%c+3(^$k>%>Tudq9X-#iup6j`wBMF2vOFgrOX*qlnCjc8ITML zH`Wp6YCU_nz$lviFyP`HHq{RPzGKJ-MO=VXtQ*iizh$yd>!>X8j@vtC?;${V50QKV zql6kDUcxI&|CT!WnP-_!)`{e;y7ahys`|K0s7^gnLlMQpNqXd-`<1hN_+`qCtQ5#= zjl_YvUfb+Z&pLWivq4kkF_IBybP0+49$o!<=HX=Ik&x>1cEVhTd6}4WpK4EEm9f&4@NYn81KN*$DTZk!v zuO14`I3)fPlb%aofVW)H*o1-(mY|bq^_wzxM*eH(`-_rRF*ixruE0|?Ju*|b+~@uZ z{8rfpPV>>r=$q7zvYxQ?Nlp1tYP)2!m@~8#+%>yq?TMw}I9kq&Ep@LyLQ>E#+nzd-7^kr13jWk30d}-l{Q<-?%TbM5f-d!&>`Sfx}dSh5kM@Z)T>i))v??x72dQ5oGQ!YYLj;f)<4G z_WnxlEZb}PqKR6jue-}Fc`(@3QeLx_>w37>+C$#u8bg(DCgXW~NI~*RTU_TWL+zzz z$lZVB+Xgn&ApBE(mW0$=u(~Ycp0>l4t4UH#$)RQ6JiNH06At-n$reuy8 zZ$>N~sj%crPB4D~m+wOKOp}#xV%^Nm=#skogxA53v54C``pjOasHzV?yn*`Fu2C0X zLx24yKxw@f@=2&>EqtmR*z3%=;m5i);Honp>D;l-sMz{`SKD>^zS44T-}!jH&x#)@ zIX@sFwAk*=@aQL6z>GMV>khowA3}6e&cA#2Vg2dfssKr!U@v-(C;i4gUi-K$(M#ym zg=TM4LRTMFmr(ItHk@tV_M)Fh4OJ}XeW*=8cuhz&ac`*{Or8LX$tfR3@ztjMc(+w* zg2rq4?z*RsjFDISIB7%l9@NSidY)eMi^eDJqW2pQBKKU+xGX{SpUB}Od3e3Lpa+UQ z>nX;DHxj_Je_O$>rSS}*sPz^jRY(g2&voVo>(R-)y3Zw0MtA^_;9Yc|29zs~0Dole zn697iC^T=yS?QRvPwmoNKUPJv_FNAmShYy2NibR3y+f3KCeF}BpTvcNiGYmpbo$N6 z^8}p6Ax}_Mgpbp3lzz)&5ap2gCrk2-YH`j(6);HOUuTKG6rt z8r6EtYqI+5WcWQ2Frd?U41*^Odp9I?OLWI?KaU`x5*-`2ss-lA$RA-2|E zdwi{Czb`+N-OcvW3Q3F0^RDBA6}m7M2A0CWpqn-CykVA zOUq6QX^V@ICXK@ZJ0SD;!} zGrqe2HPV~RChx<2YVaX#yVE_#L3M=^H^|pSKopUcf7xqZ*%-g(>+!7AiAc3=3LoAN zkCkd|+NRo{PtUIA&F{V>#vpui&xi<)bAI)p?YLM4ZPpzl9IZg?EKvX=l)kY@^k*=LakfxH9FKT z6H!WxKu(M;)&*pW1bAF%lAu5>i`^iLep7(Rg z3+(NU4k5aaQ}>lrved-?h%vvi# zZCeV1!voQVwQO65YM09<8!j&*=QNOk+$UHC z;=ILLRX3aZ{+syl4(fphoE+4in3I3F ztjd}(s<)>D5J1^0*R!92(&Blq62N~=)R@>g+~_(pLDm{A-a>j(lXu5su@rkv#bd)$kFeR%r=2+*P#Y^6EXnfr z4p*ZI0{$;Vh^FXr=9QDojn&Uy*TgX0_Xf3S^S`l>oUFHV6E0yhrsQPZ+aq;!D+u(; zXcXjUd!8RN%aXJ^koeivE7B&Wu|rIIKPTCe=TvLZmD)3@V6_MLdptT7cvyP_BqoxW z@jOG~Hy${*{?AYqCm7ih7(O>&4sGU*WleE4D>x=e52iO$$}ozeNN9RKE^Oqr)rZa7 zD)g23X&8lxlRib4pE&NV@6U?Pzx=ps;QIY)Iv;^w8}6wd*__0QU&(ZM6B;baIC?g1 zBIhUDk6+xRRZM@Y!f47tsc1qh(GcVOuF1No&*lsxg-YL=-;Klsd`5Lz9tztJQ`%&f z5=`CJ=mO&btk_Qlhk;+!W0`p0$4im*rV4I6KR@yOt4b{Xw9xKU{Vh>5Qq6WPhQoyq z7j*RKPxI^5-9h+#{`hyMEm_}c=oPe^c7#elU;kvbSQz!?^Us3YyBiua2W>rhd_J*P zOXm$8iKpA7WIIxj2u1g?tWQuDob0b`_n( za*?fBg1(?*s`%Jgugw|tJT6GFt&iBwZo0V(v=?n^NHb2?82gKXZ8f7^wa+Zl7AlK> zjleZe7A3s#D9Wfq66o@S=RkSK+&=Ejl8pI&kMDKcEx|qW$3cExve9PQ@poo9X$O9A zw7(y^zR)oAi1rOq8)18V)}9>rrF75|?se-h`}TQFSLrB;?T~Fg+&b*%ySFPpA5y-l zi!gnh@T)b#OlsprCr4tEjD-=cP>l3V%goiK=f9d=`AWm|+7tc9&F@8IHLl$SsoFPe z;o;@SH`}$RDGVlFT*Q{EIuti{jWEvm0mb3BeOh@n{d=kT7_$!Iy_Zr9Jmj8IyY+-m z#b(7L5nYks;Jqj(Q7CsZtk;IE7#Z5gueUp0corWdZrEmZ>Nw0_%Qy@~C-t9L(sP2$ zywMb#)4>|sZy3EY+JER~SC4}17BCE+irqj`1f`@sq1 zk_#BuMVm8)>0$OSaWr=!_dP9mHm)|Dv7M#Vp{c_#_YrQ^xn3{O_ss~rWz3J9Kua}H z9DONL+-Qx^B+UF#*aLF6mV4AzXXo97Ll2!u<`6NnPq4GJr@T0^rk^$by#C6d5aD54 zZN86BCvR8`N>sfa*XEi}!yXBqhXE8cZ3i4wyEN|AE(Nyq#t+rqKC~Cr$0g2A*AQZ< zGj}tgaOFpjdE=M#8XB*TyKYh2$hrC9+$iExUt(kojPc%zphW*PeZ!GBY=3*GyvA*_ zJgH`0B>zjQI}d(ntr|+^I&g<%w>ln6qg|z_^EEDMn2{p%n-wt@E!$K$O;~hMOJY`w z{!q{B2~K&G9esDsjSDFq4AzoU08i}vi~Ad(kMe1+2Dl{TSg!x z{!kQHcx102_3IunpZ*`fHA^(-IX)=jWug4P-`+tTqdelJV}uGP{;A9u-YgJ{ex+VV zjTAKqOfG`ALo#;*wNkaA)|fk^?dQo=GLDvy{sUHT(gS9W{pMO$ECXhnbQhG*WKfK$ zRvqszYUg~JjFh4!H*z(l29fC6fO(`UZw>L^FYW{HlCAkw9Yv$^6GSXQPzmwv4jZR; zAP|C|gC-pUG_`YmY(feOE3HYzw0Lw*45S|yp~5ONDc8U&iL9e}GE8il;&vZW+M01C z$XWz>C&Tz#09~UoZ;c3lm-*`bc*#FQ{dX=TsH?dpTEkFlBblDyuo;^5#swh&V1$x7 zgZ`{3W@1+|`X4Mn3Err5iB>u79y;@6vutgt-C-yDJHn~l)Bm0?0C0mP0iC3HtdaXX zWE*FbUodwQqU2+wXCbz_c+>r8IM3`mmt?wTXI|_`xOLalg^eWf4`O*5SS}9NGtM4c zwmv{LOF9`Lo#r#*S~PFc#B?J0tVu6_a>d^`tpGxO;}Lb-KrOTIXsiwXa=%-H)WR4d9#uCu zY3)Crd^bQv5ovg;J#8fYP1&$r1}n!BYrdkGc7kTfe9IzPJb!<2$3u*DW(GJ*gcj*G zfIXO57ZAcLNtQNV6WMrh6KA4$`9ksUKZ_>#+Svrbd+Al=UrK+#y%o~+&ho_RvgE-* zK>DN7fuY|tyfh^ox%O`5@0uv=%g;mAjrbyWZ5zoWa5QY9e#;1?S&)uUi+|jE6(o}j6aW2qYCG$cxXTCcLkIxbDi=Gacak5gQoUXtNVY3cnxgDu%}P3 zBoXhj}O z3k_`=kz1ml`ZA_u%&}Z$$IK9GcnU^Z^gQBYL{`fX`alMmY3%r!H58@S^c5S;f3Km{ z1#c^j3i1Os{}Ve=8Wnk*Eq);lt_W9k-!=al_b{<8&- zLK_I7f7zk51bA=98*8IKGeZa>?iWSLca8A`5_~6$+oU@nyMxXAo6Gy-T~ZP663>Ta z4zCV}!^U3vn1MjA?=B8{_kG@Yl^q_}Ql_?2jgaj_6_22KGf-#k6Vk=R0W= z#W@m|)rhYwL1tWi$Z3emV1@;?>(_V0Q}~IEqAiVwKQz~Is-co)VM!IMt?wdBN+1DD zWjB8r8ZSCjTFNGScZroJ9pTvK%wYUnzdRGUF!zzo4}G(ToX?=SdpP)AYQGGBuwX8d zXn2+pmB}H&hm9S~M^UWvfpQu#3eoX1etmV(a<0j;=e?CYOgoOqs`O})B*)J_v*B?! zZih6gnaTGn4S@ddharhh-Oe3gle~LZfMe_FYZ{p2MY{^*ews;O4O#Ra#MkH+fSYdVMmKIC`EOnpMhOY>qI$hhYoq*bRpHyOMl1}uf$(kbJx&+o%!ww7Kp4AN%G?%eP(Cf}f8AVTmcCFl@H0=Vr=g#gLG&g&tj{AEM8GM2_1< zZ*JK_!iL5$h%5B6;Wk^QzX=0wQ{YCL%69&XZ6_J(0863GBD*Ck)bQFyulvUoqZMw0 z=06EZ;$;jygNERNkSv8#Om0JQz(|aR{=Eb)L@48D`P{hDf+X23`9i8l@UKD3!Jqki z9Q=`pl*XdkN7$mor#fck8+L%@Sy6A`HkR&&%#v8)V-Tq7GvBwt^7@dj8d=c|gM)t8 z+PuSRb)394vJsl%FWdQv9%|!*Pe*G3sxkx0T$EDa=u!pLd_UG7?cP5cWRpykE2`Kd z>p#BG#WGtxw<2fmrR{PMxAUZY`9jLb_sKF{)M-XIoqBEy&{82--$ChrzNPh%t z`5`y@==W)kh5awB;)FEh?R9W8*kPk1vUO!jX_Mf@#cD$B($Jq};P;f8qD;gZaghEr zq6eAQG+4?DqKN%2KB59Sl@l?XPIBim)fj1gczpXe?eE1@xC8Qd0#WdS1c44^u)9lO zzdahSe$!?RkOp1MyqcrnhO5`31Lwaiq+~ie;MB_QlDZ+tnfqM#rsEExnkq;rw7^d2 zn{8%Dp4XY+u}Pj$?%qt6{t4tk2-1Rr;1j!3tWuPKa*dKDZUJF~=wv8b>3lrJp(#l_ z%=kU7PFV@KmyBi6t6Z30h<8b;Da+`?qlT6hM*tFRI+U?RGd*9O-NZLt@Lgb{olYN8;yi~3CR;g>Ef z=GS^74Ry|1vj1nwV(*E~+>H-&)6pm+00+npepR*JiiMf6PT&AaI#NVm_I}jkgv9ckgB&ZIG>13r>rq3w9rF0-9 zT7K0!e&DqcL}3I}RU{zoqGT!8cqzLP2Xqm;To})Z6!X`EMI-^RTq%|2_*kccBquHR zCuu`0jZ=0f#cx@>%0~~$nPev1SCZTCcNd)-yex`fzMIs0#;9pTd#3B7e-|8~7o3Tm z>lKUms4%@_pg4ZnEn*XjKmTsO?e2ORvw+5*&0gg&h|{Bo8Ee9|@`{xCOK3qTayIWU zefJmz-qLZow7&(~8Dd^J0~~Y4J82jsJC4xx^7w6(G(DWlvxA&EN^qW{rE9^ODXG*^ zrx{j}zA%KRj0YOV46>DTDUXZ+uhl-+8cs!?mGnqSwB|nvkcKSGonbq~86f<9IdIq9 z2J&3=7(}Il&^WF@ZuyZnMpE);mS2FjBx`EtUd?&We#0l{_qKP_^2r)dd2R>vRnHuH(?6n&jTp zl+XTr7HciiC&&9}>XTEF1ERAaW0iWsjo~9=%UR{X7Dq&XWKj^(;*Y-BmEO2l12N-v zqN<)n9q6Ep+%8!rJL`z%tN0*2c1X{_;oxQmwp21YL=DEfNn7L%=H?5ge8xYQ??o#o zLjh!+tTD;Eaur4*&(7P!t2LA zsdV2%flB@^qKJyBsFAY;@)TGx-6jY&@lRcqya3@kCmEU`K01aNN+E0$6#juHOd49K zMx{F|+qZsbAp$IL60v#hqVF33&%DF)G~puxViaw!Np>Q=I>G8F=$QL5a4$+#IH}1( z{~|&wBc*+{BWv)_V6>7n%(0_@Uz|75{ zk8l_?WU>l(WevJrKSU_n2Bh5#>Gtb!tN}wZ*D~7s;DHy7R0;&uw`xT1nB$Q>t3#B4 zP=y#A0y6YgpH=o0q01DpD{63uZARd?l+TJ5W7^UFIq7#k8hL7Q=Dg@!+9nxKe%U)6 znH08diIBcebGk+-s4tp98_RLOR_`h}lVG$9^q9tjv;t8{ynS^w|LvMY$nCFlgEvsw zOCst&@94deL$bMNrB$+LqoosZ^=x-YOge_4p>ncYhEO$?AxHkHrh4WNuSm0NqWixK z{`0WekA+`_!Wun9A2wa`M<^+r0`SfI z+lUhLnXN{YK0(aCbp{30iliDoV|Oxtog zC`0-M`Q3S@IvH!E;q?o2eHMc9vz6O>E%dx?hU&|@pWr~xbahiRb(^AXBcSFoMj@@y z;?M@?h&1Vfky62-b2tPrc*Q}m1rQ6QUh*XWAq}d(0(6Sk-m;#@iciy0;%LHl5}}}a z&y$N7kcbNcfmsj_6QATy-BW=cQa;DVb3#r!jCQJvxK0lh@w?9dGJr-+Jb}Qv|0~qs zH(Nd)a8cqyG^wli(L;@IX8|#w0IUcn3&GN#!Z|`c3j{Z6idRx?2Gz-v>+Dxnk$4XX z77=I`+;Xb7hcez4$3#nCg<0NJ{80ppp+yRQqw?Tc)jD*HDy)|IGshY5AzqZ8y9>qC zeFxRSt~x$Q9bwtC@AMBAFaydxCp-wLaZXNBiEY};jPhYY2%FFet}-5M^B}?RD|0su z?CQwZt}-nMMhqAO&&rwNU=8JzzJWq>bMVlFH4$3?haj!qw>GZoGdsNRi>ph6O6^QQ zR};*m9MXFin=bWH`^*9Ek4r_HS2Cdam*XO|*-e3Kr&FcXr^9m+!LXd|;>xn#03VnK zHQdtsUwMNAfXB0XoeRysc)Kz^LiGLpOV~^x)P~^M6K17NYVeYI{I>J(MT zBXa_@eoNs=VO3clJ^Wp-{gbZ1JOY9GDtl+ni71Z^Af|UN-@Oa_Ob`KT$%lt3NI{i$ z1s3x*hunGz7u-OFRI8^(7+^-fF2jr>yvt(X=SdiUc=5tvd>N47R-#C@_~O-HV0%cS z^3-V*G*nbqa|&+U)6a~$-t+r21Y}c6E1eJ6Bo}u+Zacgl2_)faXGyV40C-Rwdznu6 zwqsI5@hLCN?>cDMZ8rJz+wj|SvM7YPWfjc6WvAt-v%2G$MdkHA(F#HL~@wxV(6?#D)w4r zgiA8pFl*H5NmIticllc%^M>xwpUp>N%Z2uCcb$XD?z!`v8!WR%pO&if@CP}sUG2-A z=vge8VK%M1jO%K64rLS?1rkGoVX26V*efq5s`5S$ejQ;71%m?t1nzFVX zh{*jEvlOHcgyducLcwpE#_c)>E7o$+N7k=>;x?2Q2gz9S^|YQQ*6N9|gP+Hv(?aZ0 zdF)Bsg!o&AUjgcsTO?jYWY%ec(ySp}Dfb}rz1DudvHQx}VAZ9>C?r4x!y8CNDrgu`h^q(D)_LsDM=ewiVMt1etz^T1=Y*Kwnup$j z?xAGSvs}a>kZ=47Km2~zp&9Im9IB?_dqfKXYo*}`7*Qk;?uxVmI0EV~Na9>13=%A@ z@*2!m!+y(_0ZNVqe2ePJ-`Ymz?`M0Cw=$+bpc%+gj7~e_{SH2LSZA78LAEA59j$Hl z{$1umxG^VzOuL~ZV@x}Dt>>^~D^2wpGy(XufYgqPZofOw48AuJa@V2yS^)BW2sIEv zIemHNu!1D;Yw+=gR{B8dr@D_2*$qe0Cx$i}gh`KFKQDE-ihQEv+uR&{*Mk_a)%)*R zw=zb1PY}v(ZLs^<9aH~BCJh#pA*iLKo6c|WC-vZxKiPEe2|DtYWqlC?T zEJh%PsqQ|$7MJ}&@e~SQoh|yuImKm=X6`cmq@Was&w!YL{(6No%ZQ@E0j3d(be+1u zx>HDcx+*<^;-?vE;5h$Dpm*s&ZSX`Kx+>N{G$?_TyE(JaR>L|86+$FJ@V9X@d-@`oo>nca& zq!G#j2}nljeZ`Jj4F$XdjLXIw`wdB=k&p*$?3b`}?zgFXzm%tVGGv{l2cqVQF#b;5 z?|d)Zs(JjGw3nJ1p%>x7(o<{p^Ew8O%>vAl{RWSlK{Ga1fuJgYs4Jtkko@`ei?14) zox!OeVM;=H?T6HVec1e_+zM$N1r+}-pQGlz&~{wn@3?=DkhrJuycmatCO3!NLq-6a z0JzKQSs1KXFm=dyTIz%6M+HiCXyXda^1vo`%nA2|s^@nnN6%nR+`U^xMidNuskDKk z@X;Cv7hYJvED6R}IerZit997Or$3lZ9K1IYtwodHLG3#xzIWSx1`7Af-oJJ}jvaA5 z>r5fd&DwN~5;V&~=X>V{VSFv1;^ZfOAE5QyTwh9sj3q6ms33>D~jg zPabRadq7W!S}yxxzXn7(E3mWVjnB?4cI4j~K&tNVh77`#8TW>g&|zwwP(!SNoLC%b zqN#xIY?wi#M+EY%6Lc%mt1IB;4TapB()hMwGk&wCAQRc>GtoqGAe}V){lN-u&+)VM z(D=P8eyapdLB_o2AMtHgo}0^7!@BS)O9 z#)lK+g}qwJ8B0)xL=2etoqPHGaG+H(_V9{O4!H%s&tCx-m9#fhNcD0Y)f6c^lOwJZ3<3HY3k+QEwFteERgCDf7v-jGgg zr{ov5Tw>>hTnWq^rt zt5GFBxzl#m(?|hiS$m=8yNZme*m6l!AAft%?x&lQB>J(c7rzIc$v3~@za1V*XR*DN zIyFp+=R^;lU{Q?b?wcbUi6_b&XQ03l*>AgpP$v{TSk1G(UlR)2&|lQRKjcG%113T` z&2S>BT=i$=sX|q#7B3;B(J8I0o8vdfp?Q31^ZvyWFCAr6E@SukwO}vnk^s+JPxF(m zT^e(Rc(;d;G5waR*}P^^J67wbFOL?tX-^`RPtQ7?mAofy6gTBLfg|ajHTJF=?mRjx zqs%C2O}r8%Pf5Vj<`*pn$ZgL$6uxbqf{n!cE?0D%c*Y6g#p@)AQwgP3_XW__0Jf%n zWzM1e1t^}HO{d%KyFTl%n6ncg)7R`{s*>btFN$`E^(wfjr>ojM*|zwzh$B>f0pm3_ zznOCH+|db$uU;(NsNQG!+0(2ai=^wlLT^1f?W~oNDk9{gaDZtqIFAljpu;Bi7xN=^ zjog@BP1oTQ5m6~54wkCCGg}0k8hy)1g8DwELl+gm(2c+5_7ig^lRI`T!cg)y(3=N1 z4#{(8pFY`Ne-?_gw#`Rz(SZHC^wMY4gh?JQY) zCLZweU0BkK{!E@*f&C2U)#Oy0DMGZgKHKGPIy0^BY7w5bdW+7QcBj-TuY|}Ri=uR?EkCl%j2Q?{Q}X(3|JS1L=0EE!5hh3q0_iHtC!GQ?EKmMHr!&*x61 ze$VT9UjFgAbI-l!-gD09ob&#?-|u@>acL4Ud#W4oo0kk<$_QiMIJGqQv^p*5G$tIu zR59EKKfxS$;IgsVp<{)8vxlDVI}7-n@rC}r~o@7AJAUBLh`+ca58j$`IC1YTYT)`teYPU4ND#lf}%S%1b|1?8U3Q14W}O3;JR- zflYao--8U4;}FYpUG)fp`(f+o%t&So(}gbQRL{4a?+apO9MahaZ&_yT!-A0MTOp)l ziA4{Y%WkyyVHaVg!TliXdzb49Xg?^YeSz*ao}o~Z;CHnTkyZ>G?z(qv`wdI;rwE7! z`_H!m4d1?50Fv^_m)!&mxNch}I?;DhzjqcFzK{PLA>VxTBO;k7I>atuq*cNy@S;gJ zM+TRvgbt11!pcZ_ zM@^a3f+{aqO&R^LZe4xE7__*Elo44&ESeBzuB1;3S*x$qL;lqLFOv9YN@r@M4EXlg z@kXgSIxc$R85xBzJG9YlE>}D?l@AAQPmcBW5-C@W_hkDYXZMV+gV>g`8&2cI1#%1I$3_MRZ?`Mk7j}sl@OujM=jzHRXoy|Cn>j#J8 zO9`bvTjk!xJ9n5{8arICzVMRM;kZ|Y(~<^1!xlN8>BDhHP+>Purj!R7&aJoOzyzZWb57^h*A#YZFwk)k#Pamw5W82 z%m*vy)^Z>Gkwd*iN0{|AiE_)GoRKHJ9?F?H&J1UTQ#ELxmWR^IE6Zcx2AGJsuMnwc$LWI}w9z*7S06aIiY|QE$v!o7 z;O4@fdc^=i`=~^t6){bN6oEWpak+$dF;PVK{n%!-215inaG=;mru3&)&a1qcfPvTp zah%pbXAO&OY${kunjI1p=jI!9I30CF+1YkMKr%Pw%h^Cg8lo$h9Y$aicWY^z5hSiv za5^mUDw3ik9l~Rqn`FFJ3@3uu=7Y!jA@#NqC8?+AKjYzN_}FDz!q+|dL!3DgWa^bK z0-p`rtxRf`EKzEU_L;JaPs;mm$ZSt9dGOuexAib#{&{DBa9ZFhI}l@24VQls2Nggzl+^RriCD$P*3+NKlbCW)!=Atoz-*$bl)VRw3Zn;93W1|li@)`vKx5A{qI%;5aN4-z7Ja>2b%{F zrBZ?9H5GF-J8yGwYlNn05Ne7GN4$t>L*GYKTeT$zgO3M*E)iuAsgi>8bTnM_shx1Z z`$sWrqx`GTfq_Oitp{^xy$}DY+>}7_^zYG=hpL9 z>W?EPzMPF=zFSa{`|3yPfkE{P&jDnA_imJ$G(9RaYQvX3h5l4vpOOZesJF#e@z}{> zHTvZ@K0EKkSk(!sV#E~Y1X{bSmcnK;6EZ(JJuNIbqK1LBT8NL_71?^Ba5lyz#9g7> z-Jn-=cSXS9-C1LtabfiJ4MX)V_bw$G2X!9jVBruf^omzW$n_LcxbzV>83-;CCL9Wb zhc&pvbsdiNB6@xiJYoxH=SrvTfOa6@rEIf+|Ggj@p|v(z%GmchW#@Lk8aoR~&k}0W z?)@6A!lr2LpuoBghZh1@$Q)Yt?{%}7)+Jvss%JkdWk$i4kW^~ThMVP!aqlmCXK`=n zkKUmevDbFB7$gTw(O-v~`~)306=>#iojxh5%ms#}mqg}R-V}KhRgcT(c!v3*S!(NU zD~YGPPtMY#UY$CoqE_9dZ7}}`nD{Mn&63|U`-K-xcX_={9&HFOWa2(vjO^f&x)cqh z-W_BVsMid>6S*Kd@o?8TIbKQDaJBrk^SFenMzdu8@$JILU!U2{aSnFOfv%6!oHqLV z4Qe$P)llrsxY827t$+lKwyVBQ|8p}Z#^E}7kjw}hBnn6Sbj)WCN|I6%e($g(LCJ4q z#w>m580JuGcfe9u75RXd`-d+1)Gi7z>3+z(ToW>Y)t0?bclwZ>U=qWZCux=5L?=1n zg4!g`25FQ+u$a)kFwv%Fz328PrhE+ytp-}voeGkK9qUt$Qmp%8jyN{medO1qGjQ&b zFTy zRW8!)n^9lraR$2nA~#D|_K&tGMr$>`sjxmh!|gS2Q)oY$XV;~Y|DG4RFC1aYd>|;U z#QLT5FpDLz!TQ?aQ{K~u(PDh5Hspqvo0g;4vP`fqS_w{Uk-TQw-PeT~wrlRIc%KEU zUwnvCsjlfLuG6FML-K8n=jLb`ijujzq&+$f7{MQ15}iUO^Aokj<8HUBMYeC)Z1X*4 zvOn$8qJQxG?ISrtxqEXG=Ixrt&+bj#ny%_(y|xrtWoHvJTg|`fJge_AP-kOAJj$iF zXgp|IvYD%XtkG`D85PH+O^GfFkBv1m;F!kp`FsiAqkZ;t%$_`F@nU(Sd{sAzn5I3} ziCNafL)A@=-8#t$L1r4^0&{MMCiz)Ow)0!xnH{>u_MfK{*5Ho6>E@r*69w$I2eAo0 z50zczRRM z#ji@@&)5lxxvyF!))`(7Rt7J~BKdD;i*#QN6Yq2?7h8 zNCc*G$<1C8ZOJG^!j8Pbw!A={V~Em9{t26f;e)hXagrD$ zwzhDw9YGZ%l!`Wbl&ZkIskD}6hPY=a$_6O4AH>>tX;3{6D|NOl=ZMARhwH2MsS>8= z;jQ1Fzcm|QBf<&<->K{y%+^lvcdpWjr!^~J%%ru9-r8H3F>C<1LtaN}OZG+z!qbHu z)zIAwDSLort2g;!kFw*Tj2-|9DpxlatRnC`6OgFXS^PTUMx{d75zu^sz099`#F-)R zNel-cGE2nUH5P%>@+@V5G`s{rMAeV%0lB>ZJ0OP7{jqNl?^~a$3c!g^HncJBoBY67 zy9W2&2QU+xz0y$uR;v;e6RsDahMUOtdG1rdkJnQoxo@Wd%go0{A(&B`WkV%c(S+_= zcz`Fc9 z*fpeS#0k)AT9k&uF4P)O*sYV0N+WvP7F7e5h$#H;woa45U6}{usOKj7S0_%HGhBH! z+>#>%8L`I?A-QhJjiw~zm9=xDB|`2?qQd}lpqFku2HI}i>zH0d`6>Jgr5($?_ z_w?c!A8#OqZc;#lgx;Hj+1`RUO+}ih+Wfb9vs6-Rux&6LW@kq^1;26TB?AUi8UgoYOYfo04-dEEW5!+f% zRy`4C_c}DGu|rUyg9_q1$FVM2j=*A5bgcLRCJqk8l-Ij89ZKEmx6{}oG!zrEE&;8Q ztDj{AQ#;t7yg10}4>{KJ$}rN@=Bqq^$TO5V-$1bgXDx+;X=h2BUhU|QspT-0esb0v5TnDiK zS{*{A3!M&3N!wO`7Lab4G3fBovz@23g@zFsy5wL&dgU*(n3C zhtP#@LDC%Gi-Wd>OOA`!CBsFMA;dIO11KlgD|;K^!Rx1)=RE#f7^q+1jd$-(F5>zF z1!!4*fQUUfZ?_|depiy|0FXHM1QDO?*ZJ#k*M=yFIXDmhen)9KHL8gBz!t*( zv8~@E*2`)@{fZeiJ0v^)@u(X|?jd!h8E7lWZO-%1_jJ!f(UZZfEPq-CnA71%ge7gk|i*9 z3>EE8)!+wD0vBV|-sd>P!!HAeDhQySF9zOk{_wMXe!MZ2Dsey>s_C-e{|K}KDNNNq ziwr)5T`tW6CQG^+_f1r`5H9pW@GRnL8%fL~JI#$>6(zz4GDs+?u847R!B6vcue z9)2+i*XLl}ZBS#CB?Z5o+9r!Y{uC>ae@y7!2SXx+Ov44$S=2b*XI-Tw(8Qm5iIvrx z^v3L))(hL0t;J11eaZmd1=D}|JFjmg?5rpJFUk5w>)or%VmysSwWp@7DQx~-4f3f< zVJkMBKL;h6iY@)hGV&4sVBXsfTptbX63i`F4`@^t4mZDZ9zW&3^w>iMNC9~yNs48k zdTiY-ul&mI&ycTHc;Ppnwp{K;#I#q`S8n~BO#A;@djOZe zR+&Z9mf%rMdNr31Ww=7$Hs0q7wEJHj9y-S?bh2;linxFJHhe$<#_e)c3+m5#C8_wq zJIMO%?y7U%-(d6=${KY#Sy=s37<`ZZb17UVfm7nIL2t^-K=Hi+H#XBhjau*VA|cK& z=vBk+`$HZ68CFOX+~4>2n;#>)8D6=?{ML5-i3B<%SPES9=f~5n3bx`6UEPCNCf4w8z|Spb@#HvkmHUQCF%*f4zRDD2DyK zg#EpaLjS~{6#a~nmyoDxR66H)@ACDk3p3^^EA0UwBoJW`9)j>|-QQX~ z`uC}D@BmJuc{Ue1fW7g%A^+EYDx(X9n5MP;|Lv#z|4cEo2=c%7QyI!ET^oTY49)%j zwx1&Y4P}P^m;Dr&)Y-XLBJ)E}K3&i6SP6ApU(v2R!@HOvyMBYmlbR;HO`; z0mAei^zU zpOyLh_iTV)XH7--?3{CZ!Ni#H@7@Y-d`SGdrK1g_Vwt}A;h&m82WOOO3+z*$c$W0H zzJ?iLFvv8^eLYj4rP5Fyh!bweedyS{r7pR5{>~s6hLd7g&D>6e^Z0uJNQ=UoW7Q%&hEeSnum0{V<$(vv=hl z{HK?-alr+J+M`tUel;29JgUVSn8+{z^B(+VtNg3p4?$9Dy)7o>@2mxFwq(@1Mo&k# mwkBG>CmFtx#Dpwr80id?d@V;m3ns!5x`X>o^q=V2h5jED3bADX diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 341f0d1550668..78ef218f9f58e 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -24,41 +24,17 @@ Please note that this feature is only available from CSM 1.7.1 and onwards. ## Terminology and Components -### SHS - -[Slingshot Host Software](../../glossary.md#slingshot-host-software-shs) - -### FM - -[Fabric Manager](../../glossary.md#fabric-manager) - -### FMN - -[Fabric Manager Node](../../glossary.md#fabric-manager-node) - -### SLS - -[System Layout Service](../../glossary.md#system-layout-service-sls) - -### HSM - -[Hardware State Manager](../../glossary.md#hardware-state-manager-hsm) - -### BSS - -[Boot Script Service](../../glossary.md#boot-script-service-bss) - -### CANU - -[CSM Automatic Network Utility](../../glossary.md#csm-automatic-network-utility-canu) - -### SAT - -[System Admin Toolkit](../../glossary.md#system-admin-toolkit-sat) - -### SMA - -[System Monitoring Application](../../glossary.md#system-monitoring-application-sma) +| *Component* | *Reference* | +| --------------------------------------------- | ------------------------------------------------------------------------------------- | +| SHS | [Slingshot Host Software](../../glossary.md#slingshot-host-software-shs) | +| FM | [Fabric Manager](../../glossary.md#fabric-manager) | +| FMN | [Fabric Manager Node](../../glossary.md#fabric-manager-node) | +| SLS | [System Layout Service](../../glossary.md#system-layout-service-sls) | +| HSM | [Hardware State Manager](../../glossary.md#hardware-state-manager-hsm) | +| BSS | [Boot Script Service](../../glossary.md#boot-script-service-bss) | +| CANU | [CSM Automatic Network Utility](../../glossary.md#csm-automatic-network-utility-canu) | +| SAT | [System Admin Toolkit](../../glossary.md#system-admin-toolkit-sat) | +| SMA | [System Monitoring Application](../../glossary.md#system-monitoring-application-sma) | ## Architecture @@ -76,8 +52,6 @@ of the Fabric Manager, real service outages can exceed 5 minutes. To address these issues, CSM 1.7.1 includes FM on baremetal support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment to achieve HA. -![FM On Baremetal](FM-HA-1.png) - ## Configure FM on baremetal To configure FM on baremetal please follow the [procedure](Configure_FM_On_Baremetal.md). @@ -85,4 +59,4 @@ To configure FM on baremetal please follow the [procedure](Configure_FM_On_Barem ## Slingshot Switch Firmware Update - For clusters using the FM pod: CSM will continue to handle switch firmware uploads and updates as [before](../../operations/iuf/workflows/slingshot_management_network_switch_updates.md#perform-slingshot-switch-and-management-network-switch-firmware-updates). -- For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in _HPE Slingshot Installation Guide for CSM_ PDF. +- For clusters with bare-metal FM: FMN will host the switch firmware, and FM will be responsible for managing switch updates. Refer section "3.2.6 (Optional) Update HPE Slingshot switch firmware" in HPE Slingshot Installation Guide for CSM PDF. diff --git a/upgrade/README.md b/upgrade/README.md index 4b02ed2d02bec..161c3f9db30cd 100644 --- a/upgrade/README.md +++ b/upgrade/README.md @@ -60,5 +60,4 @@ CSM 1.7.1 patch upgrades. Instead, consider upgrading to the latest CSM 1.7.1 pa ## FM On Baremetal -Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow the -[procedure](../operations/fm_on_baremetal/README.md#fm-fabric-manager-on-baremetal). \ No newline at end of file +Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow the [procedure](../operations/fm_on_baremetal/README.md#fm-fabric-manager-on-baremetal). From a751fae99914809f501c05de5206c1be225a5bdd Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 17:24:03 +0530 Subject: [PATCH 85/94] Apply suggestion from @sravani-sanigepalli Co-authored-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index ef6bc189fd703..cee48f96b3173 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -14,7 +14,7 @@ and configuring the necessary networking to support Fabric Manager on `baremetal * By default, Fabric Manager would be running on Kubernetes as a Kubernetes pod * After Fabric Manager is migrated from a Kubernetes pod to bare-metal infrastructure, it cannot be reverted. -## Post upgrade of CSM from 1.7.0 to 1.7.1 +## Post upgrade of CSM to 1.7.1 Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. From 2f7c7d8489adbffa64a943d9fa7980514e3a8c15 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 17:25:29 +0530 Subject: [PATCH 86/94] Apply suggestions from code review Co-authored-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../fm_on_baremetal/Configure_FM_On_Baremetal.md | 10 +++++----- operations/fm_on_baremetal/README.md | 4 ++-- .../Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index cee48f96b3173..5083d8d527cc9 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -16,7 +16,7 @@ and configuring the necessary networking to support Fabric Manager on `baremetal ## Post upgrade of CSM to 1.7.1 -Post CSM Upgrade from 1.7.0 to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. +Post CSM Upgrade to CSM 1.7.1, if an administrator wishes to enable Fabric Manager on baremetal, they must follow below procedure. * Step 1: [FMN Prerequisites](#fmn-prerequisites) * step 2: [FMN Pre Boot](#fmn-pre-boot) @@ -41,9 +41,9 @@ Verify that the BMC of each FMN is configured with the correct root user credent ### FMN Base Image Creation -The FMN base image creation process supports node discovery, configuration, and base image customization. The base image contains only the essential artifacts required for deployment. +The FMN base image creation process includes node discovery, configuration, and base image customization. The base image contains only the essential artifacts required for deployment. -The following steps details the process for generating the FMN image. +The following steps detail the process for generating the FMN image. #### Create FMN base image (only base OS; no Fabric Manager) @@ -161,7 +161,7 @@ Proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal ### Validation -#### Validate base FMN nodes bring up successful completion +#### Validate the successful bring-up of the base FMNs 1. Check if we are able to access both FMN nodes (`fmn001` and `fmn002`): @@ -297,7 +297,7 @@ cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --forma #### BSS should be updated with new hosts entries for FMN with proper configurations -**Note:** BSS global parameters also needs to be updated with FMN IPs(VIP not included). +**Note:** BSS global parameters also should have been populated with FMN IPs and FMN VIP For Example: diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index 78ef218f9f58e..e0605d2023b95 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -13,7 +13,7 @@ that manage and monitor Slingshot fabric operations outside of the Kubernetes en CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. -Please note that this feature is only available from CSM 1.7.1 and onwards. +Please note that this feature is only available from CSM 1.7.1 onwards. **NOTE**: @@ -38,7 +38,7 @@ Please note that this feature is only available from CSM 1.7.1 and onwards. ## Architecture -In CSM versions < 1.7.0, the deployment of the Fabric Manager within CSM uses native Kubernetes capabilities—both during upgrades and in failure/HA scenarios. +In CSM versions <= 1.7.0, the deployment of the Fabric Manager within CSM uses native Kubernetes capabilities—both during upgrades and in failure/HA scenarios. Kubernetes itself provides health checks and a scheduler that can rebalance workloads across nodes based on load, administrative policies, and other criteria. The Fabric Manager is deployed as a single pod in Kubernetes. A traditional HA model for Fabric Manager doesn’t map cleanly into Kubernetes, so instead, Kubernetes’ built-in mechanisms detect failures and spin up a replacement pod, minimizing downtime. diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md index c0955baa0065c..81f2d30dd403b 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md @@ -46,7 +46,7 @@ The latest CSM documentation has been installed on the master nodes. See [Check y ``` - Overall, the script `ncn_add_pre-req.py` will ask the following question: + Overall, the `ncn_add_pre-req.py` script prompts the user with the following questions: ```text How many NCNs would you like to add? Do not include NCNs to be removed or moved. From 1ce4624aa227ef09be7b92aa700f19a1482bfc02 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 18:09:04 +0530 Subject: [PATCH 87/94] Update Add_NCN_Data.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md index 22c3ea6b37eaf..0265e5e7ac8f7 100644 --- a/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md +++ b/operations/node_management/Add_Remove_Replace_NCNs/Add_NCN_Data.md @@ -358,7 +358,7 @@ The NCN MAC addresses need to be collected using the [Collect NCN MAC Addresses] ``` * For FMNs (Fabric Manager Nodes), where alias is fmn00*, we need to pass on additional `--fmn-image-id` parameter with FMN base image ID - generated in the [FMN base image creation stage](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). + generated in the [FMN base image creation stage](../../fm_on_baremetal/Configure_FM_On_Baremetal.md#fmn-base-image-creation). For Example: Base image id of FMN is `06135c73-bcd9-4d38-928f-ada20bdf6a6` From cc5bd358c05328ce1525ae6e3c3cc309785a4fb3 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 18:36:37 +0530 Subject: [PATCH 88/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 113 ++++++++++++++++-- 1 file changed, 104 insertions(+), 9 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 5083d8d527cc9..caddf21bd5399 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -1,6 +1,6 @@ # Configure FM (Fabric Manager) On `Baremetal` -This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage `LUNs`, +This document describes the procedure for customizing and deploying the base FMN OS image along with provisioning storage volumes, and configuring the necessary networking to support Fabric Manager on `baremetal` following the CSM upgrade. ## Requirements @@ -114,17 +114,18 @@ sat bootprep run \ **Note:** Using the `--overwrite-images` option in the command above will overwrite any previously uploaded images in S3. -**Note:** After creating the FMN base image above, follow below steps to add FMN nodes to CSM: +### Add FMN nodes to CSM -### Add NCN procedure +Follow the steps below to register FMNs in CSM (SLS/HSM/BSS) and configure the required network, storage, and cloud-init settings in BSS. +These configurations will be provisioned automatically during node boot. #### Allocate NCN IP Addresses -Follow [`Step-1`](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) +Follow [`Step-1`](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for allocating NCN IP addresses. #### Add NCN data -Follow [`Step-3`](https://github.com/Cray-HPE/docs-csm/blob/CASM-5740-fm-ha/operations/node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) +Follow [`Step-3`](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for adding NCN data. #### Generate Switch Configuration With CANU @@ -215,6 +216,8 @@ Arch = "X86" Class = "River" ``` +**Note:** NMN and HMN should be having additional FMN VIPs also allocated. + #### Validate FMN required networking configuration Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). @@ -251,6 +254,11 @@ Comment = "fmn-virtual-ip" IPAddress = "10.252.1.13" Name = "fmn-vip" +[[results.ExtraProperties.Subnets.IPReservations]] +Comment = "fmn-virtual-ip" +IPAddress = "10.254.1.2" +Name = "fmn-vip" + [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn001-mgmt",] Comment = "x3000c0s28b0" @@ -279,12 +287,22 @@ For Example: cray sls hardware describe x3000c0s28b0n0 ``` -**Note:** NMN and HMN should be having additional FMN VIPs also allocated. +Example Output: -For Example: +```text +Parent = "x3000c0s28b0" +Xname = "x3000c0s28b0n0" +Type = "comptype_node" +Class = "River" +TypeString = "Node" +LastUpdated = 1770352943 +LastUpdatedTime = "2026-02-06 04:42:23.048807 +0000 +0000" -```bash -cray sls search networks list --name NMN --format json +[ExtraProperties] +Aliases = [ "fmn001",] +NID = 100011 +Role = "Management" +SubRole = "FabricManager" ``` #### HSM `ethernetInterfaces` should be updated with the same allocated IPs @@ -295,6 +313,83 @@ For Example: cray hsm inventory ethernetInterfaces list --component-id x3000c0s28b0n0 --format json ``` +Example Output: + +```json +[ + { + "ID": "1423f200029a", + "Description": "", + "MACAddress": "14:23:f2:00:02:9a", + "LastUpdate": "2026-02-06T12:57:53.593753Z", + "ComponentID": "x3000c0s28b0n0", + "Type": "Node", + "IPAddresses": [] + }, + { + "ID": "1423f2028e93", + "Description": "", + "MACAddress": "14:23:f2:02:8e:93", + "LastUpdate": "2026-02-06T12:57:53.47447Z", + "ComponentID": "x3000c0s28b0n0", + "Type": "Node", + "IPAddresses": [] + }, + { + "ID": "1423f200029b", + "Description": "", + "MACAddress": "14:23:f2:00:02:9b", + "LastUpdate": "2026-02-06T12:57:53.515843Z", + "ComponentID": "x3000c0s28b0n0", + "Type": "Node", + "IPAddresses": [] + }, + { + "ID": "00e0ed3210ed", + "Description": "CSI Handoff MAC", + "MACAddress": "00:e0:ed:32:10:ed", + "LastUpdate": "2026-02-06T12:57:53.362998Z", + "ComponentID": "x3000c0s28b0n0", + "Type": "Node", + "IPAddresses": [] + }, + { + "ID": "1423f2028e92", + "Description": "Bond0 - bond0.nmn0- kea", + "MACAddress": "14:23:f2:02:8e:92", + "LastUpdate": "2026-02-06T13:00:14.114892Z", + "ComponentID": "x3000c0s28b0n0", + "Type": "Node", + "IPAddresses": [ + { + "IPAddress": "10.252.1.13" + }, + { + "IPAddress": "10.102.193.42" + }, + { + "IPAddress": "10.1.1.10" + }, + { + "IPAddress": "10.102.193.205" + }, + { + "IPAddress": "10.254.1.22" + } + ] + }, + { + "ID": "00e0ed3210ec", + "Description": "CSI Handoff MAC", + "MACAddress": "00:e0:ed:32:10:ec", + "LastUpdate": "2026-02-06T12:57:53.327619Z", + "ComponentID": "x3000c0s28b0n0", + "Type": "Node", + "IPAddresses": [] + } +] +``` + #### BSS should be updated with new hosts entries for FMN with proper configurations **Note:** BSS global parameters also should have been populated with FMN IPs and FMN VIP From c1a7fa0d6f8dcf304ef4d10858a051e5ce8d1fa4 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 18:59:04 +0530 Subject: [PATCH 89/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index caddf21bd5399..649ed71268599 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -21,9 +21,10 @@ Post CSM Upgrade to CSM 1.7.1, if an administrator wishes to enable Fabric Manag * Step 1: [FMN Prerequisites](#fmn-prerequisites) * step 2: [FMN Pre Boot](#fmn-pre-boot) * [FMN Base Image Creation](#fmn-base-image-creation) - * [Add NCN Procedure](#add-ncn-procedure) + * [Add FMN nodes to CSM](#add-fmn-nodes-to-csm) * Step 3: [FMN Booting](#fmn-booting) * Step 4: [FMN Post Boot](#fmn-post-boot) + * [Join Fabric Manager nodes to Spire](#join-fabric-manager-nodes-to-spire) * [Validation](#validation) * [Install Fabric Manager on FM baremetal nodes](#install-fabric-manager-on-fm-baremetal-nodes) @@ -160,9 +161,17 @@ Proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal ## FMN Post Boot -### Validation +#### Join Fabric Manager nodes to Spire + +After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. + +```bash +ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh +``` + +#### Validation -#### Validate the successful bring-up of the base FMNs +##### Validate the successful bring-up of the base FMNs 1. Check if we are able to access both FMN nodes (`fmn001` and `fmn002`): @@ -218,7 +227,7 @@ Class = "River" **Note:** NMN and HMN should be having additional FMN VIPs also allocated. -#### Validate FMN required networking configuration +##### Validate FMN required networking configuration Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). @@ -279,7 +288,7 @@ IPAddress = "10.102.193.206" Name = "fmn001" ``` -#### SLS hardware should list the new nodes +##### SLS hardware should list the new nodes For Example: @@ -305,7 +314,7 @@ Role = "Management" SubRole = "FabricManager" ``` -#### HSM `ethernetInterfaces` should be updated with the same allocated IPs +##### HSM `ethernetInterfaces` should be updated with the same allocated IPs For Example: @@ -390,7 +399,7 @@ Example Output: ] ``` -#### BSS should be updated with new hosts entries for FMN with proper configurations +##### BSS should be updated with new hosts entries for FMN with proper configurations **Note:** BSS global parameters also should have been populated with FMN IPs and FMN VIP @@ -404,15 +413,7 @@ cray bss bootparameters list --format json --name x3000c0s28b0n0 cray bss bootparameters list --hosts Global --format json ``` -#### Join Fabric Manager nodes to Spire - -After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. - -```bash -ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh -``` - -#### Validate FMN required storage configuration (LVM partitions) +##### Validate FMN required storage configuration (LVM partitions) Check if both LVM partitions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` respectively on both FMN nodes (`fmn001` and `fmn002`). @@ -459,7 +460,7 @@ fmn001:~ # mount | grep /opt/slingshot /dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) ``` -#### Validate addition of FM required repositories +##### Validate addition of FM required repositories Check if all the required repos are added on both FMN nodes (`fmn001` and `fmn002`) in order to install prerequisite OS RPMs required during Slingshot Software installation. From 42b5b5a542c97b3fd364f6a70fcc97b27c0b7671 Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 19:03:12 +0530 Subject: [PATCH 90/94] Update Configure_FM_On_Baremetal.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 649ed71268599..64fd6394d1963 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -24,7 +24,7 @@ Post CSM Upgrade to CSM 1.7.1, if an administrator wishes to enable Fabric Manag * [Add FMN nodes to CSM](#add-fmn-nodes-to-csm) * Step 3: [FMN Booting](#fmn-booting) * Step 4: [FMN Post Boot](#fmn-post-boot) - * [Join Fabric Manager nodes to Spire](#join-fabric-manager-nodes-to-spire) + * [Join Fabric Manager nodes to Spire](#join-fabric-manager-nodes-to-spire) * [Validation](#validation) * [Install Fabric Manager on FM baremetal nodes](#install-fabric-manager-on-fm-baremetal-nodes) @@ -161,7 +161,7 @@ Proceed to boot the FMN nodes (using iPXE boot commands) with the FMN bare-metal ## FMN Post Boot -#### Join Fabric Manager nodes to Spire +### Join Fabric Manager nodes to Spire After the Fabric Manager nodes have been deployed and are running, join them to Spire to avoid issues with Spire tokens. @@ -169,9 +169,9 @@ After the Fabric Manager nodes have been deployed and are running, join them to ncn-m001:~ # /opt/cray/platform-utils/spire/fix-spire-on-fmn.sh ``` -#### Validation +### Validation -##### Validate the successful bring-up of the base FMNs +#### Validate the successful bring-up of the base FMNs 1. Check if we are able to access both FMN nodes (`fmn001` and `fmn002`): @@ -227,7 +227,7 @@ Class = "River" **Note:** NMN and HMN should be having additional FMN VIPs also allocated. -##### Validate FMN required networking configuration +#### Validate FMN required networking configuration Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). @@ -288,7 +288,7 @@ IPAddress = "10.102.193.206" Name = "fmn001" ``` -##### SLS hardware should list the new nodes +#### SLS hardware should list the new nodes For Example: @@ -314,7 +314,7 @@ Role = "Management" SubRole = "FabricManager" ``` -##### HSM `ethernetInterfaces` should be updated with the same allocated IPs +#### HSM `ethernetInterfaces` should be updated with the same allocated IPs For Example: @@ -399,7 +399,7 @@ Example Output: ] ``` -##### BSS should be updated with new hosts entries for FMN with proper configurations +#### BSS should be updated with new hosts entries for FMN with proper configurations **Note:** BSS global parameters also should have been populated with FMN IPs and FMN VIP @@ -413,7 +413,7 @@ cray bss bootparameters list --format json --name x3000c0s28b0n0 cray bss bootparameters list --hosts Global --format json ``` -##### Validate FMN required storage configuration (LVM partitions) +#### Validate FMN required storage configuration (LVM partitions) Check if both LVM partitions `/dev/mapper/metalvg0-SCFIRMWARE` and `/dev/mapper/metalvg0-SLINGSHOT` created and mounted under `/opt/cray/FW/sc-firmware` and `/opt/slingshot` respectively on both FMN nodes (`fmn001` and `fmn002`). @@ -460,7 +460,7 @@ fmn001:~ # mount | grep /opt/slingshot /dev/mapper/metalvg0-SLINGSHOT on /opt/slingshot type ext4 (rw,relatime,stripe=256) ``` -##### Validate addition of FM required repositories +#### Validate addition of FM required repositories Check if all the required repos are added on both FMN nodes (`fmn001` and `fmn002`) in order to install prerequisite OS RPMs required during Slingshot Software installation. From 83dda884929aa9d6d99045e8b097bc912076903a Mon Sep 17 00:00:00 2001 From: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> Date: Tue, 16 Jun 2026 19:46:35 +0530 Subject: [PATCH 91/94] Update README.md Signed-off-by: ravikanth-nalla-hpe <140072234+ravikanth-nalla-hpe@users.noreply.github.com> --- operations/fm_on_baremetal/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/README.md b/operations/fm_on_baremetal/README.md index e0605d2023b95..2c9887002f2e6 100644 --- a/operations/fm_on_baremetal/README.md +++ b/operations/fm_on_baremetal/README.md @@ -1,4 +1,4 @@ -# FM (Fabric Manager) on baremetal +# Slingshot Fabric Manager on baremetal - [Introduction](#introduction) - [Terminology and Components](#terminology-and-components) @@ -8,7 +8,7 @@ ## Introduction -The Fabric Manager (FM) bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) +The Slingshot Fabric Manager (FM) on bare-metal enablement within the Cray System Management (CSM) framework introduces dedicated Fabric Manager Nodes (FMNs) that manage and monitor Slingshot fabric operations outside of the Kubernetes environment. CSM 1.7.1 includes bare-metal FM support, which provides the necessary base OS image, networking, and storage configurations for running the Slingshot Fabric Manager natively within the CSM environment. From 1f16e784b23e9d7037ea610ca2b908f39878cebb Mon Sep 17 00:00:00 2001 From: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Date: Tue, 16 Jun 2026 20:03:30 +0530 Subject: [PATCH 92/94] Clarify NCN procedures in documentation Signed-off-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 64fd6394d1963..456c5ed8a8515 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -122,11 +122,11 @@ These configurations will be provisioned automatically during node boot. #### Allocate NCN IP Addresses -Follow [`Step-1`](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for allocating NCN IP addresses. +Follow [Step-1 of NCN Add Procedure](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for allocating NCN IP addresses. #### Add NCN data -Follow [`Step-3`](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for adding NCN data. +Follow [Step-3 of NCN Add Procedure](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for adding NCN data. #### Generate Switch Configuration With CANU From 034c3501b8568a3d68a059429afa0527fd8b3ed1 Mon Sep 17 00:00:00 2001 From: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Date: Tue, 16 Jun 2026 20:09:55 +0530 Subject: [PATCH 93/94] Update Configure_FM_On_Baremetal.md Signed-off-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> --- .../Configure_FM_On_Baremetal.md | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index 456c5ed8a8515..a36611c1d788f 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -225,11 +225,10 @@ Arch = "X86" Class = "River" ``` -**Note:** NMN and HMN should be having additional FMN VIPs also allocated. - #### Validate FMN required networking configuration Check NMN, CMN, HMN, CHN, metal and virtual IP configuration for both FMN nodes (`fmn001` and `fmn002`). +**Note:** NMN and HMN should be having additional FMN VIPs also allocated. ```bash ncn-m001:~ # cray sls networks list @@ -254,19 +253,15 @@ Name = "fmn001" [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn001-nmn", "time-nmn", "time-nmn.local", "x3000c0s28b0n0", "fmn001.local",] Comment = "x3000c0s28b0n0" -IPAddress = "10.252.1.12" +IPAddress = "10.252.1.13" Name = "fmn001" [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn-vip.local",] Comment = "fmn-virtual-ip" -IPAddress = "10.252.1.13" -Name = "fmn-vip" - -[[results.ExtraProperties.Subnets.IPReservations]] -Comment = "fmn-virtual-ip" -IPAddress = "10.254.1.2" +IPAddress = "10.252.1.4" Name = "fmn-vip" +... [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn001-mgmt",] @@ -277,8 +272,13 @@ Name = "x3000c0s28b0" [[results.ExtraProperties.Subnets.IPReservations]] Aliases = [ "fmn001-hmn", "time-hmn", "time-hmn.local",] Comment = "x3000c0s28b0n0" -IPAddress = "10.254.1.21" +IPAddress = "10.254.1.22" Name = "fmn001" + +[[results.ExtraProperties.Subnets.IPReservations]] +Comment = "fmn-virtual-ip" +IPAddress = "10.254.1.2" +Name = "fmn-vip" ... [[results.ExtraProperties.Subnets.IPReservations]] From cf1b0556c86ba1a9ad80f6d459b2cf9a14fcac98 Mon Sep 17 00:00:00 2001 From: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> Date: Tue, 16 Jun 2026 20:13:15 +0530 Subject: [PATCH 94/94] Update Configure_FM_On_Baremetal.md Signed-off-by: sravani-sanigepalli <131810845+sravani-sanigepalli@users.noreply.github.com> --- operations/fm_on_baremetal/Configure_FM_On_Baremetal.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md index a36611c1d788f..789cc0170fcad 100644 --- a/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md +++ b/operations/fm_on_baremetal/Configure_FM_On_Baremetal.md @@ -122,11 +122,11 @@ These configurations will be provisioned automatically during node boot. #### Allocate NCN IP Addresses -Follow [Step-1 of NCN Add Procedure](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for allocating NCN IP addresses. +Follow [`Step-1 of NCN Add Procedure`](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for allocating NCN IP addresses. #### Add NCN data -Follow [Step-3 of NCN Add Procedure](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for adding NCN data. +Follow [`Step-3 of NCN Add Procedure`](../node_management/Add_Remove_Replace_NCNs/Add_Remove_Replace_NCNs.md#add-ncn-procedure) for adding NCN data. #### Generate Switch Configuration With CANU