Skip to content
1 change: 1 addition & 0 deletions .github/workflows/build-all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ jobs:
# Build specific images in alert-manager
echo "Building specific alert-manager images"
$GITHUB_WORKSPACE/build/pai_build.py build \
-n \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s alert-manager \
-i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/build-deploy-changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
timeout-minutes: 120
environment: auto-test
container:
image: ubuntu:latest
image: ubuntu:24.04
volumes:
- /var/run/docker.sock:/var/run/docker.sock
env:
Expand All @@ -30,7 +30,7 @@ jobs:
DEBIAN_FRONTEND=noninteractive apt install -y git

- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v5
with:
fetch-depth: 0
submodules: false
Expand Down Expand Up @@ -111,6 +111,7 @@ jobs:

- name: Build Images of Changed Services
if: steps.check.outputs.has_changed == 'true'
shell: bash
run: |
changed_services="${{ steps.changes.outputs.folders }}"
echo "Building: $changed_services"
Expand All @@ -120,13 +121,15 @@ jobs:
# build specific images in alert-manager
echo "Building specific alert-manager images"
$GITHUB_WORKSPACE/build/pai_build.py build \
-n \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s alert-manager
-i abnormal-detector,alert-handler,alert-parser,cert-expiration-checker,cluster-utilization,job-data-recorder,job-status-change-notification,node-failure-detection,node-issue-classifier,nvidia-gpu-low-perf-fixer,redis-monitoring
fi

echo "Changed services after removing alert-manager: $changed_services"
$GITHUB_WORKSPACE/build/pai_build.py build \
-n \
-c $GITHUB_WORKSPACE/config/cluster-configuration \
-s $changed_services

Expand All @@ -141,6 +144,7 @@ jobs:

- name: Push Images of Changed Service to GHCR
if: steps.check.outputs.has_changed == 'true'
shell: bash
run: |
changed_services="${{ steps.changes.outputs.folders }}"
echo "Pushing: $changed_services"
Expand Down
2 changes: 1 addition & 1 deletion deployment/clusterObjectModel/mainParser/machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def validation_host_properties(self):

if "k8s-role" not in host:
return False, "k8s-role is miss in the host [{0}]".format(str(host))
if "k8s-role" is "master":
if "k8s-role" == "master":
if "etcdid" not in host:
return False, "etcdid is miss in one of the host with the [k8s-role: master]."
if host["etcdid"] in etcd_id_visited:
Expand Down
2 changes: 1 addition & 1 deletion deployment/utility/pai_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def cluster_version():
try:
# redicret stderr to devnull
DEVNULL = open(os.devnull, 'w')
version = subprocess.check_output("kubectl get configmap pai-version -o jsonpath='{.data.PAI\.VERSION}'", shell=True, stderr=DEVNULL)
version = subprocess.check_output(r"kubectl get configmap pai-version -o jsonpath='{.data.PAI\.VERSION}'", shell=True, stderr=DEVNULL)
logger.info("Cluster version: %s", version)
except subprocess.CalledProcessError:
logger.warning("Can't fetch cluster version!")
Expand Down