Skip to content

Merge pull request #11 from last9/dependabot/pip/idna-3.15 #20

Merge pull request #11 from last9/dependabot/pip/idna-3.15

Merge pull request #11 from last9/dependabot/pip/idna-3.15 #20

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
jobs:
python:
name: Python (${{ matrix.python-version }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Install dev deps
run: |
python -m pip install --upgrade pip
pip install -e ".[dev,k8s]"
- name: Lint (flake8)
run: flake8 --per-file-ignores=l9gpu/_version.py:F401 l9gpu
- name: Format check (black)
run: black --check l9gpu
- name: Type-check (mypy)
continue-on-error: true
run: mypy l9gpu
- name: Unit tests (pytest)
run: pytest l9gpu/tests --ignore-glob='**/tests/*_internal.py' -n auto
go:
name: Go (${{ matrix.module }})
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
module: [k8sprocessor, slurmprocessor, k8shelper, shelper]
defaults:
run:
working-directory: ${{ matrix.module }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "1.25"
cache-dependency-path: ${{ matrix.module }}/go.sum
- run: go mod download
- run: go build ./...
- run: go vet ./...
- run: go test -race -count=1 ./...
- name: golangci-lint
uses: golangci/golangci-lint-action@v6
with:
version: v1.64
working-directory: ${{ matrix.module }}
args: --timeout=5m
# golangci-lint v1.64 ships with Go 1.24 — skip lint until a
# newer release supports the Go 1.25 toolchain required by the
# upstream OTel collector v0.150 dependencies.
continue-on-error: true
helm:
name: Helm chart lint + template
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: azure/setup-helm@v4
with:
version: v3.14.0
- name: helm lint (default values)
run: helm lint deploy/helm/l9gpu
- name: helm lint (example values)
run: |
for values in deploy/helm/l9gpu/examples/*.yaml; do
echo "::group::helm lint with $values"
helm lint deploy/helm/l9gpu -f "$values"
echo "::endgroup::"
done
- name: helm template (default)
run: helm template l9gpu deploy/helm/l9gpu > "$GITHUB_WORKSPACE/rendered.yaml"
- name: Validate rendered manifests (kubeconform)
uses: docker://ghcr.io/yannh/kubeconform:latest
with:
args: -strict -ignore-missing-schemas -summary rendered.yaml
dashboards-and-alerts:
name: Dashboards + alert rules
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Validate Grafana dashboard JSON
run: |
fail=0
for f in $(find dashboards/grafana -name '*.json'); do
if ! jq empty "$f"; then
echo "::error file=$f::invalid JSON"
fail=1
fi
done
exit $fail
- name: Validate YAML (alerts + demo + helm examples)
run: |
pip install --quiet yamllint
yamllint -d '{extends: relaxed, rules: {line-length: disable, truthy: disable}}' \
alerts/ deploy/demo/ deploy/helm/l9gpu/examples/
- name: Install promtool
run: |
mkdir -p /tmp/prom && cd /tmp/prom
curl -sSLo prometheus.tar.gz \
https://github.com/prometheus/prometheus/releases/download/v2.53.0/prometheus-2.53.0.linux-amd64.tar.gz
tar -xzf prometheus.tar.gz
echo "/tmp/prom/prometheus-2.53.0.linux-amd64" >> "$GITHUB_PATH"
sudo curl -sSLo /usr/local/bin/yq \
https://github.com/mikefarah/yq/releases/download/v4.44.3/yq_linux_amd64
sudo chmod +x /usr/local/bin/yq
- name: Validate PrometheusRule CRDs
run: |
fail=0
for f in $(find alerts/prometheus -name '*.yaml' -o -name '*.yml'); do
extracted="/tmp/$(basename "$f").rules"
yq eval '.spec' "$f" > "$extracted"
if ! promtool check rules "$extracted" 2>&1; then
echo "::error file=$f::promtool check failed"
fail=1
fi
done
exit $fail
collector:
name: Collector OCB build
runs-on: ubuntu-latest
env:
OCB_VERSION: v0.150.0
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: "1.25"
- name: Install OpenTelemetry Collector Builder
env:
OCB_VER: ${{ env.OCB_VERSION }}
run: |
go install "go.opentelemetry.io/collector/cmd/builder@${OCB_VER}"
echo "$(go env GOPATH)/bin" >> "$GITHUB_PATH"
- name: Build collector distribution
run: builder --config deploy/collector/builder-config.yaml
- name: Validate example config
env:
OTEL_EXPORTER_OTLP_ENDPOINT: http://localhost:4317
OTEL_EXPORTER_OTLP_HEADERS: "Basic placeholder"
KUBE_NODE_NAME: ci-node
run: ./_build/l9gpu-collector validate --config=deploy/collector/config.example.yaml
systemd:
name: systemd unit syntax
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Stub l9gpu binary (systemd-analyze checks ExecStart exists)
run: |
sudo install -m 0755 /dev/stdin /usr/bin/l9gpu <<'EOF'
#!/bin/sh
exec true
EOF
- name: systemd-analyze verify
run: |
fail=0
for f in systemd/*.service systemd/nvml/*.service; do
[ -s "$f" ] || { echo "::error file=$f::empty unit"; fail=1; continue; }
grep -q '^\[Service\]' "$f" || { echo "::error file=$f::missing [Service]"; fail=1; continue; }
output=$(systemd-analyze verify --recursive-errors=no "$f" 2>&1 || true)
echo "$output"
# Only fail on true syntax errors, not unknown-key warnings
# from newer systemd directives the runner image doesn't know.
if echo "$output" | grep -iE 'syntax error|bad|missing|invalid' | grep -viE 'Unknown key|ignoring'; then
echo "::error file=$f::systemd-analyze found syntax error"
fail=1
fi
done
for f in systemd/*.slice systemd/nvml/*.slice; do
[ -s "$f" ] || { echo "::error file=$f::empty slice"; fail=1; continue; }
grep -q '^\[Slice\]' "$f" || { echo "::error file=$f::missing [Slice]"; fail=1; }
done
exit $fail
python-wheel:
name: Python wheel build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Build wheel + sdist
run: |
python -m pip install --upgrade pip build
python -m build
- name: Twine check
run: |
python -m pip install twine
twine check dist/*
- uses: actions/upload-artifact@v4
with:
name: python-dist
path: dist/
docker:
name: Docker build (l9gpu)
# Collector Dockerfile expects a pre-built binary from goreleaser
# and is exercised by the collector-release workflow. Here we only
# smoke-test the standalone l9gpu Python image.
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3
- name: Build l9gpu image
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile
push: false
load: true
tags: l9gpu:ci