diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml index 4a71e601d5..79c16e4855 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml @@ -119,6 +119,9 @@ - firewall-rules - delete - "{{ deployment_name }}" + - name: Include Failure Triage Agent trigger tasks + ansible.builtin.include_tasks: tasks/trigger_failure_triage_agent.yml + - name: Destroy deployment register: gcluster_destroy changed_when: gcluster_destroy.changed @@ -179,6 +182,9 @@ - firewall-rules - delete - "{{ deployment_name }}" + - name: Include Failure Triage Agent trigger tasks + ansible.builtin.include_tasks: tasks/trigger_failure_triage_agent.yml + - name: Destroy deployment delegate_to: localhost register: gcluster_destroy diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml index 30923c1fd1..1d7e9fd7a1 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml @@ -34,6 +34,9 @@ environment: TF_IN_AUTOMATION: "TRUE" always: + - name: Include Failure Triage Agent trigger tasks + ansible.builtin.include_tasks: tasks/trigger_failure_triage_agent.yml + - name: Destroy deployment register: gcluster_destroy changed_when: gcluster_destroy.changed diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/tasks/rescue_gcluster_failure.yml b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/rescue_gcluster_failure.yml index a68fef2811..e27ca4a236 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/tasks/rescue_gcluster_failure.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/rescue_gcluster_failure.yml @@ -18,6 +18,9 @@ - deployment_name is defined - workspace is defined +- name: Include Failure Triage Agent trigger tasks + ansible.builtin.include_tasks: trigger_failure_triage_agent.yml + - name: Delete Firewall Rule register: fw_deleted changed_when: fw_deleted.rc == 0 diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/tasks/trigger_failure_triage_agent.yml b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/trigger_failure_triage_agent.yml new file mode 100644 index 0000000000..90ad8e428b --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/tasks/trigger_failure_triage_agent.yml @@ -0,0 +1,147 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +--- +- name: Set Triage Agent Configuration + ansible.builtin.set_fact: + triage_gcs_bucket: "{{ triage_gcs_bucket_override | default('') }}" + triage_project_number: "{{ triage_project_number_override | default('') }}" + triage_invoker_sa: "{{ triage_invoker_sa_override | default('') }}" + triage_cloud_run_url: "{{ triage_cloud_run_url_override | default('') }}" + +- name: Check Triage Agent Prerequisites + delegate_to: localhost + changed_when: false + args: + executable: /bin/bash + environment: + TRIAGE_BUILD_ID: "{{ full_build_id | default('') }}" + TRIAGE_GCS_BUCKET: "{{ triage_gcs_bucket }}" + TRIAGE_PROJECT_NUMBER: "{{ triage_project_number }}" + TRIAGE_INVOKER_SA: "{{ triage_invoker_sa }}" + TRIAGE_CLOUD_RUN_URL: "{{ triage_cloud_run_url }}" + ansible.builtin.shell: | + if [ -z "$TRIAGE_GCS_BUCKET" ] || [ -z "$TRIAGE_PROJECT_NUMBER" ] || [ -z "$TRIAGE_INVOKER_SA" ] || [ -z "$TRIAGE_CLOUD_RUN_URL" ]; then + echo "SKIPPED: One or more Triage Agent configuration variables are missing." >&2 + exit 0 + fi + + if [ -z "$TRIAGE_BUILD_ID" ]; then + echo "SKIPPED: The 'full_build_id' variable is missing." >&2 + exit 0 + fi + + if ! gcloud storage buckets describe "gs://$TRIAGE_GCS_BUCKET" >/dev/null 2>&1; then + echo "SKIPPED: Triage Agent bucket '$TRIAGE_GCS_BUCKET' does not exist." >&2 + exit 0 + fi + + CONFIG_CONTENT=$(gcloud storage cat "gs://$TRIAGE_GCS_BUCKET/config_triage_agent.env" 2>/dev/null || echo "enable_agent=false") + if ! echo "$CONFIG_CONTENT" | grep -qi '^[[:space:]]*enable_agent[[:space:]]*=[[:space:]]*true'; then + echo "SKIPPED: Failure Triage Agent is currently disabled in config_triage_agent.env." >&2 + exit 0 + fi + + echo "PROCEED: Agent is enabled and build ID is present." + register: triage_init + ignore_errors: true + +- name: Execute Triage Agent Pipeline + when: "'PROCEED' in triage_init.stdout" + vars: + triage_build_id: "{{ full_build_id | default('') }}" + block: + - name: Trigger Failure Triage Agent + delegate_to: localhost + changed_when: false + args: + executable: /bin/bash + environment: + TRIAGE_BUILD_ID: "{{ triage_build_id }}" + TRIAGE_INVOKER_SA: "{{ triage_invoker_sa }}" + TRIAGE_CLOUD_RUN_URL: "{{ triage_cloud_run_url }}" + TRIAGE_PROJECT_NUMBER: "{{ triage_project_number }}" + ansible.builtin.shell: | + TOKEN=$(gcloud auth print-identity-token --impersonate-service-account="$TRIAGE_INVOKER_SA" --audiences="$TRIAGE_CLOUD_RUN_URL") + if [ -z "$TOKEN" ]; then + echo "Failed to get identity token." >&2 + exit 1 + fi + + RESPONSE=$(curl -sS -w "\n%{http_code}" -X POST "$TRIAGE_CLOUD_RUN_URL/trigger" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"build_id\": \"$TRIAGE_BUILD_ID\", \"project_number\": \"$TRIAGE_PROJECT_NUMBER\"}") + + HTTP_STATUS=$(echo "$RESPONSE" | tail -n1) + BODY=$(echo "$RESPONSE" | sed '$d') + + if [ "$HTTP_STATUS" != "202" ]; then + echo "Failed to trigger agent. HTTP Status: $HTTP_STATUS" >&2 + echo "Response Body: $BODY" >&2 + exit 1 + fi + ignore_errors: true + + - name: Wait for Analysis to Complete + delegate_to: localhost + changed_when: false + args: + executable: /bin/bash + environment: + TRIAGE_BUILD_ID: "{{ triage_build_id }}" + TRIAGE_GCS_BUCKET: "{{ triage_gcs_bucket }}" + ansible.builtin.shell: | + # Wait for Cloud Run to start and for the initial state file to be copied + for i in {1..12}; do + if gcloud storage ls "gs://$TRIAGE_GCS_BUCKET/$TRIAGE_BUILD_ID/state.json" >/dev/null 2>&1; then + break + fi + sleep 5 + done + + if ! gcloud storage ls "gs://$TRIAGE_GCS_BUCKET/$TRIAGE_BUILD_ID/state.json" >/dev/null 2>&1; then + echo "Agent failed to start: state.json was not created within 60 seconds." >&2 + exit 1 + fi + + for i in {1..30}; do + STATE_JSON=$(gcloud storage cat "gs://$TRIAGE_GCS_BUCKET/$TRIAGE_BUILD_ID/state.json" 2>/dev/null || echo '{}') + STATUS=$(echo "$STATE_JSON" | python3 -c "import sys, json; print(json.load(sys.stdin).get('status', ''))" 2>/dev/null) + if [ "$STATUS" = "completed" ] || [ "$STATUS" = "failed" ]; then + echo "$STATE_JSON" + exit 0 + fi + # Time delay between polling attempts + sleep 30 + done + exit 1 + register: agent_state + ignore_errors: true + + - name: Print Triage Report + delegate_to: localhost + ansible.builtin.debug: + msg: | + {% if agent_state.failed or (agent_state.stdout | default('{}', true) | from_json).status | default('') != 'completed' %} + Failure Triage Agent testing did not complete in time or failed internally. + {% else %} + TRIAGE AGENT SUMMARY: + {{ (agent_state.stdout | default('{}', true) | from_json).executive_summary | default('No summary available.') | wordwrap(100) }} + + Full diagnostic report available at: + https://storage.cloud.google.com/{{ triage_gcs_bucket }}/{{ triage_build_id }}/report.txt + {% endif %} + + For detailed intermediate state information, please review the diagnostic state file: + https://console.cloud.google.com/storage/browser/_details/{{ triage_gcs_bucket }}/{{ triage_build_id }}/state.json diff --git a/tools/cloud-build/daily-tests/builds/ansible-vm.yaml b/tools/cloud-build/daily-tests/builds/ansible-vm.yaml index 64b9f40513..2136bf0df7 100644 --- a/tools/cloud-build/daily-tests/builds/ansible-vm.yaml +++ b/tools/cloud-build/daily-tests/builds/ansible-vm.yaml @@ -34,6 +34,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -48,10 +49,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ansible-vm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/ansible-vm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/batch-mpi.yaml b/tools/cloud-build/daily-tests/builds/batch-mpi.yaml index b60844fa12..9dc47296b9 100644 --- a/tools/cloud-build/daily-tests/builds/batch-mpi.yaml +++ b/tools/cloud-build/daily-tests/builds/batch-mpi.yaml @@ -35,6 +35,14 @@ availableSecrets: env: SPACK_CACHE_WRF - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' steps: # While using static network names we are guarding against more than 1 instance running at a time (for multi-group tests) @@ -47,9 +55,10 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" - secretEnv: ['SPACK_CACHE_WRF', 'GCLUSTER_GCS_PATH'] + secretEnv: ['SPACK_CACHE_WRF', 'GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] args: - -c - | @@ -72,5 +81,9 @@ steps: echo ' timeout: 10800' >> $${SG_EXAMPLE} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/batch-mpi.yml" + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/batch-mpi.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" diff --git a/tools/cloud-build/daily-tests/builds/batch.yaml b/tools/cloud-build/daily-tests/builds/batch.yaml index 81abc0a9a5..50384dc928 100644 --- a/tools/cloud-build/daily-tests/builds/batch.yaml +++ b/tools/cloud-build/daily-tests/builds/batch.yaml @@ -35,6 +35,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -50,10 +51,22 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/batch.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/batch.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/chrome-remote-desktop-ubuntu.yaml b/tools/cloud-build/daily-tests/builds/chrome-remote-desktop-ubuntu.yaml index 7cc95f1e14..566a70d1ca 100644 --- a/tools/cloud-build/daily-tests/builds/chrome-remote-desktop-ubuntu.yaml +++ b/tools/cloud-build/daily-tests/builds/chrome-remote-desktop-ubuntu.yaml @@ -33,6 +33,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -47,10 +48,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} os=ubuntu" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/chrome-remote-desktop.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID os=ubuntu" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/chrome-remote-desktop.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/chrome-remote-desktop.yaml b/tools/cloud-build/daily-tests/builds/chrome-remote-desktop.yaml index be85523044..b1de19c23a 100644 --- a/tools/cloud-build/daily-tests/builds/chrome-remote-desktop.yaml +++ b/tools/cloud-build/daily-tests/builds/chrome-remote-desktop.yaml @@ -34,6 +34,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -48,10 +49,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} os=default" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/chrome-remote-desktop.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID os=default" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/chrome-remote-desktop.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a2-highgpu-kueue-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-a2-highgpu-kueue-onspot.yaml index 6ea0b83eb1..683082bf77 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a2-highgpu-kueue-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a2-highgpu-kueue-onspot.yaml @@ -109,12 +109,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-highgpu-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-highgpu-onspot.yaml index 6f9c8f63ad..49d0eda113 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-highgpu-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-highgpu-onspot.yaml @@ -98,12 +98,24 @@ steps: sed -i '/^ reservation:/d' $${EXAMPLE_BP} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-highgpu.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-highgpu.yaml index d97fdea39a..1b05ae8f41 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-highgpu.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-highgpu.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -69,10 +70,22 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-highgpu.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-highgpu.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-megagpu-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-megagpu-onspot.yaml index 347321ebe9..1e8bbbed26 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-megagpu-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-megagpu-onspot.yaml @@ -97,12 +97,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-megagpu.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-megagpu.yaml index 52337df9c0..f143a28573 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-megagpu.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-megagpu.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -69,10 +70,22 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-megagpu.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-megagpu.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu-onspot.yaml index ddc653aa78..e23672630d 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu-onspot.yaml @@ -100,14 +100,26 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} chs_repo=$${CHS_REPO}" \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID chs_repo=$${CHS_REPO}" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/cluster-health-scanner/versions/latest env: 'CHS_REPO' diff --git a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml index 42fb9d5615..fa1df10498 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a3-ultragpu.yaml @@ -39,6 +39,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -72,10 +73,22 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a3-ultragpu.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-a4-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-a4-onspot.yaml index 5330c73a03..a3e48ebc60 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a4-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a4-onspot.yaml @@ -101,14 +101,26 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} chs_repo=$${CHS_REPO}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID chs_repo=$${CHS_REPO}" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/cluster-health-scanner/versions/latest env: 'CHS_REPO' diff --git a/tools/cloud-build/daily-tests/builds/gke-a4x.yaml b/tools/cloud-build/daily-tests/builds/gke-a4x.yaml index 578a39d18e..a1c85d8b9a 100644 --- a/tools/cloud-build/daily-tests/builds/gke-a4x.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-a4x.yaml @@ -40,6 +40,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -78,10 +79,22 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a4x.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-a4x.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-g4-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-g4-onspot.yaml index 35aa35ad73..c3cf55a1d4 100644 --- a/tools/cloud-build/daily-tests/builds/gke-g4-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-g4-onspot.yaml @@ -91,12 +91,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX}" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-g4.yaml b/tools/cloud-build/daily-tests/builds/gke-g4.yaml index f76b9d4a85..968788fa5d 100644 --- a/tools/cloud-build/daily-tests/builds/gke-g4.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-g4.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -67,10 +68,22 @@ steps: echo ' outputs: [instructions]' >> $${EXAMPLE_BP} bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-g4.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-g4.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml b/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml index 44a8634ab9..47fc37fced 100644 --- a/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-h4d-onspot.yaml @@ -94,12 +94,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-h4d.yaml b/tools/cloud-build/daily-tests/builds/gke-h4d.yaml index 44e822c9c2..d30c742e1b 100644 --- a/tools/cloud-build/daily-tests/builds/gke-h4d.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-h4d.yaml @@ -39,6 +39,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -75,10 +76,22 @@ steps: python3 tools/fix_vpc_name.py $${EXAMPLE_BP} "${_TEST_PREFIX}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-h4d.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-h4d.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-inactive-reservation.yaml b/tools/cloud-build/daily-tests/builds/gke-inactive-reservation.yaml index b444fdf39e..52d610c8da 100644 --- a/tools/cloud-build/daily-tests/builds/gke-inactive-reservation.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-inactive-reservation.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -72,10 +73,22 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-inactive-reservation.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-inactive-reservation.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml b/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml index 9894c5875c..e1220c09c8 100644 --- a/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-managed-hyperdisk.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -61,10 +62,22 @@ steps: sed -i "s//$${IP}/" $${SG_EXAMPLE} bash tools/add_ttl_label.sh "$${SG_EXAMPLE}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-managed-hyperdisk.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-managed-lustre.yaml b/tools/cloud-build/daily-tests/builds/gke-managed-lustre.yaml index fc27f1c7dd..51591245bd 100644 --- a/tools/cloud-build/daily-tests/builds/gke-managed-lustre.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-managed-lustre.yaml @@ -39,6 +39,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -63,10 +64,22 @@ steps: sed -i "s//$${IP}/" $${EXAMPLE_BP} bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-managed-lustre.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-managed-lustre.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-storage.yaml b/tools/cloud-build/daily-tests/builds/gke-storage.yaml index 75ea16efc0..32788cf6e1 100644 --- a/tools/cloud-build/daily-tests/builds/gke-storage.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-storage.yaml @@ -41,6 +41,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -64,10 +65,22 @@ steps: python3 tools/fix_vpc_name.py $${SG_EXAMPLE} "${_TEST_PREFIX}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-storage.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-storage.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-tpu-7x.yaml b/tools/cloud-build/daily-tests/builds/gke-tpu-7x.yaml index 3b30ccc196..15a34e6cab 100644 --- a/tools/cloud-build/daily-tests/builds/gke-tpu-7x.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-tpu-7x.yaml @@ -85,14 +85,26 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} chs_repo=$${CHS_REPO}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID chs_repo=$${CHS_REPO}" \ --extra-vars="region=us-central1 zone=us-central1-c" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/cluster-health-scanner/versions/latest env: 'CHS_REPO' diff --git a/tools/cloud-build/daily-tests/builds/gke-tpu-v6e-flex.yaml b/tools/cloud-build/daily-tests/builds/gke-tpu-v6e-flex.yaml index f13491e4cb..69ac3d9c00 100644 --- a/tools/cloud-build/daily-tests/builds/gke-tpu-v6e-flex.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-tpu-v6e-flex.yaml @@ -72,11 +72,23 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" # Run the test ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke-tpu-v6e-flex.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@tools/cloud-build/daily-tests/tests/gke-tpu-v6e-flex.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/gke-tpu-v6e.yaml b/tools/cloud-build/daily-tests/builds/gke-tpu-v6e.yaml index e47744c58b..a93b713b52 100644 --- a/tools/cloud-build/daily-tests/builds/gke-tpu-v6e.yaml +++ b/tools/cloud-build/daily-tests/builds/gke-tpu-v6e.yaml @@ -100,14 +100,26 @@ steps: bash tools/add_ttl_label.sh "$${EXAMPLE_BP}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} chs_repo=$${CHS_REPO}" \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID chs_repo=$${CHS_REPO}" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${GKE_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO'] + --extra-vars="@$${GKE_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/cluster-health-scanner/versions/latest env: 'CHS_REPO' diff --git a/tools/cloud-build/daily-tests/builds/gke.yaml b/tools/cloud-build/daily-tests/builds/gke.yaml index 1a9bab23de..b96855bcd0 100644 --- a/tools/cloud-build/daily-tests/builds/gke.yaml +++ b/tools/cloud-build/daily-tests/builds/gke.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -63,10 +64,22 @@ steps: bash tools/add_ttl_label.sh "$${SG_EXAMPLE}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/gke.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/gke.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/h4d-vm.yaml b/tools/cloud-build/daily-tests/builds/h4d-vm.yaml index 88c00e8f03..36acefcebd 100644 --- a/tools/cloud-build/daily-tests/builds/h4d-vm.yaml +++ b/tools/cloud-build/daily-tests/builds/h4d-vm.yaml @@ -80,12 +80,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${H4D_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${H4D_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/hcls.yaml b/tools/cloud-build/daily-tests/builds/hcls.yaml index 2e0b849d28..92bea43f55 100644 --- a/tools/cloud-build/daily-tests/builds/hcls.yaml +++ b/tools/cloud-build/daily-tests/builds/hcls.yaml @@ -49,6 +49,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -64,10 +65,22 @@ steps: python3 tools/fix_vpc_name.py $${BLUEPRINT} "${_TEST_PREFIX}" ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/hcls.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/hcls.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/hpc-build-slurm-image.yaml b/tools/cloud-build/daily-tests/builds/hpc-build-slurm-image.yaml index 7420957fa4..68e0bff2e7 100644 --- a/tools/cloud-build/daily-tests/builds/hpc-build-slurm-image.yaml +++ b/tools/cloud-build/daily-tests/builds/hpc-build-slurm-image.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -49,10 +50,22 @@ steps: BLUEPRINT="community/examples/hpc-build-slurm-image.yaml" bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-build-slurm-image.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-build-slurm-image.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml b/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml index 8cc60dc284..cf066f9b3f 100644 --- a/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/hpc-enterprise-slurm.yaml @@ -40,6 +40,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -54,10 +55,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/hpc-enterprise-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/htc-slurm.yaml b/tools/cloud-build/daily-tests/builds/htc-slurm.yaml index f42d1a41cd..aef05add59 100644 --- a/tools/cloud-build/daily-tests/builds/htc-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/htc-slurm.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -52,10 +53,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/htc-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/htc-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/htcondor.yaml b/tools/cloud-build/daily-tests/builds/htcondor.yaml index 2a16b60880..66add7b9ad 100644 --- a/tools/cloud-build/daily-tests/builds/htcondor.yaml +++ b/tools/cloud-build/daily-tests/builds/htcondor.yaml @@ -41,6 +41,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -55,9 +56,21 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/htcondor-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" --extra-vars="@tools/cloud-build/daily-tests/tests/htcondor.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" --extra-vars="@tools/cloud-build/daily-tests/tests/htcondor.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml index 5a33a9ae54..9090445faa 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-onspot-slurm.yaml @@ -83,16 +83,28 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} "\ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID "\ --extra-vars="region=$${REGION} zone=$${ZONE}"\ --extra-vars="enable_spot=$${ENABLE_SPOT} "\ --extra-vars="tcpx_kernel_login=$${TCPX_KERNEL_LOGIN} tcpx_kernel_password=$${TCPX_KERNEL_PASSWORD} keyserver_ubuntu_key=$${KEYSERVER_UBUNTU_KEY} "\ - --extra-vars="@$${SLURM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'TCPX_KERNEL_LOGIN', 'TCPX_KERNEL_PASSWORD', 'KEYSERVER_UBUNTU_KEY'] + --extra-vars="@$${SLURM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TCPX_KERNEL_LOGIN', 'TCPX_KERNEL_PASSWORD', 'KEYSERVER_UBUNTU_KEY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/tcpx-kernel-ppa-login/versions/latest env: 'TCPX_KERNEL_LOGIN' - versionName: projects/${PROJECT_ID}/secrets/tcpx-kernel-ppa-password/versions/latest diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml index da077ec4c8..981bd98675 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-highgpu-slurm.yaml @@ -40,6 +40,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -59,15 +60,27 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} "\ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID "\ --extra-vars="region=$${REGION} zone=$${ZONE} "\ --extra-vars="tcpx_kernel_login=$${TCPX_KERNEL_LOGIN} tcpx_kernel_password=$${TCPX_KERNEL_PASSWORD} keyserver_ubuntu_key=$${KEYSERVER_UBUNTU_KEY} "\ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH', 'TCPX_KERNEL_LOGIN', 'TCPX_KERNEL_PASSWORD', 'KEYSERVER_UBUNTU_KEY'] + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-highgpu-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TCPX_KERNEL_LOGIN', 'TCPX_KERNEL_PASSWORD', 'KEYSERVER_UBUNTU_KEY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/tcpx-kernel-ppa-login/versions/latest env: 'TCPX_KERNEL_LOGIN' - versionName: projects/${PROJECT_ID}/secrets/tcpx-kernel-ppa-password/versions/latest diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml index 645ff085b7..6b46e2efdb 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-onspot-slurm-ubuntu.yaml @@ -87,12 +87,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${SLURM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${SLURM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml index 17816847ef..b775c8cc35 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-megagpu-slurm-ubuntu.yaml @@ -42,6 +42,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -64,11 +65,23 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-megagpu-slurm-ubuntu.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-megagpu-slurm-ubuntu.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-2404-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-2404-blueprint-test.yaml index a4126fe5c2..ca83f78ece 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-2404-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-2404-blueprint-test.yaml @@ -78,16 +78,28 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ --extra-vars="instance_image_project=$${CUSTOM_IMAGE_PROJECT}" \ --extra-vars="instance_image_family=$${CUSTOM_IMAGE_FAMILY}" \ - --extra-vars="@$${VARS_FILE}" - secretEnv: ['CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY'] + --extra-vars="@$${VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/custom-image-project/versions/latest env: 'CUSTOM_IMAGE_PROJECT' - versionName: projects/${PROJECT_ID}/secrets/custom-image-family-2404/versions/latest env: 'CUSTOM_IMAGE_FAMILY' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml index a87de19fc9..6c8b740950 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-custom-blueprint-test.yaml @@ -78,17 +78,29 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ --extra-vars="instance_image_project=$${CUSTOM_IMAGE_PROJECT}" \ --extra-vars="instance_image_family=$${CUSTOM_IMAGE_FAMILY}" \ - --extra-vars="@$${VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY'] + --extra-vars="@$${VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/custom-image-project/versions/latest env: 'CUSTOM_IMAGE_PROJECT' - versionName: projects/${PROJECT_ID}/secrets/custom-image-family/versions/latest diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml index 36d81dcd0d..b71264926a 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-jbvms.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -56,11 +57,23 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-jbvms.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml index c5234076a2..c7018ed917 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-jbvms.yaml @@ -81,12 +81,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${JBVM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${JBVM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml index 54489a2150..31efff9fce 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-onspot-slurm.yaml @@ -89,14 +89,26 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} chs_repo=$${CHS_REPO}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID chs_repo=$${CHS_REPO}" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${SLURM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO'] + --extra-vars="@$${SLURM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/cluster-health-scanner/versions/latest env: 'CHS_REPO' diff --git a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml index 86dfc77421..91efa41702 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a3-ultragpu-slurm.yaml @@ -42,6 +42,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -62,11 +63,23 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a3-ultragpu-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-2404-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-2404-blueprint-test.yaml index cb31f60305..ccf3f46eb0 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-2404-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-2404-blueprint-test.yaml @@ -77,16 +77,28 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ --extra-vars="instance_image_project=$${CUSTOM_IMAGE_PROJECT}" \ --extra-vars="instance_image_family=$${CUSTOM_IMAGE_FAMILY}" \ - --extra-vars="@$${VARS_FILE}" - secretEnv: ['CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY'] + --extra-vars="@$${VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/custom-image-project/versions/latest env: 'CUSTOM_IMAGE_PROJECT' - versionName: projects/${PROJECT_ID}/secrets/custom-image-family-2404/versions/latest env: 'CUSTOM_IMAGE_FAMILY' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml index 25c453a605..bb40f41fa9 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-custom-blueprint-test.yaml @@ -77,17 +77,29 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ --extra-vars="instance_image_project=$${CUSTOM_IMAGE_PROJECT}" \ --extra-vars="instance_image_family=$${CUSTOM_IMAGE_FAMILY}" \ - --extra-vars="@$${VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY'] + --extra-vars="@$${VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/custom-image-project/versions/latest env: 'CUSTOM_IMAGE_PROJECT' - versionName: projects/${PROJECT_ID}/secrets/custom-image-family/versions/latest diff --git a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml index 98b27773af..82b48a44d0 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4-highgpu-onspot-slurm.yaml @@ -87,14 +87,26 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} chs_repo=$${CHS_REPO}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID chs_repo=$${CHS_REPO}" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${SLURM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO'] + --extra-vars="@$${SLURM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CHS_REPO', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/cluster-health-scanner/versions/latest env: 'CHS_REPO' diff --git a/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-custom-blueprint-test.yaml b/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-custom-blueprint-test.yaml index 55803611b8..5d729e41d2 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-custom-blueprint-test.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-custom-blueprint-test.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -60,16 +61,28 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX}" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} "\ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID "\ --extra-vars="region=$${REGION} zone=$${ZONE} "\ --extra-vars="instance_image_project=$${CUSTOM_IMAGE_PROJECT}" \ --extra-vars="instance_image_family=$${CUSTOM_IMAGE_FAMILY}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a4x-highgpu-custom-blueprint-test.yml" - secretEnv: ['GCLUSTER_GCS_PATH', 'CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY'] + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a4x-highgpu-custom-blueprint-test.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'CUSTOM_IMAGE_PROJECT', 'CUSTOM_IMAGE_FAMILY', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' - versionName: projects/${PROJECT_ID}/secrets/custom-image-project/versions/latest env: 'CUSTOM_IMAGE_PROJECT' - versionName: projects/${PROJECT_ID}/secrets/custom-image-family-a4x/versions/latest diff --git a/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-slurm.yaml index 87af556b40..924daa0008 100644 --- a/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-a4x-highgpu-slurm.yaml @@ -41,6 +41,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -66,11 +67,23 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} "\ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID "\ --extra-vars="region=$${REGION} zone=$${ZONE} "\ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a4x-highgpu-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-a4x-highgpu-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml index bcdc7de6bf..6a9cb4f79b 100644 --- a/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-g4-onspot-slurm.yaml @@ -80,12 +80,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${SLURM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${SLURM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-gke-e2e.yaml b/tools/cloud-build/daily-tests/builds/ml-gke-e2e.yaml index 38bb944772..5be8759563 100644 --- a/tools/cloud-build/daily-tests/builds/ml-gke-e2e.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-gke-e2e.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -65,10 +66,22 @@ steps: sed -i "s//$${IP}/" $${SG_EXAMPLE} bash tools/add_ttl_label.sh $${SG_EXAMPLE} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-gke-e2e.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-gke-e2e.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-gke.yaml b/tools/cloud-build/daily-tests/builds/ml-gke.yaml index 6884f8cebf..478c6cd352 100644 --- a/tools/cloud-build/daily-tests/builds/ml-gke.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-gke.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -66,10 +67,22 @@ steps: bash tools/add_ttl_label.sh $${SG_EXAMPLE} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-gke.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-gke.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml index c5481dd7c6..510d8a5c00 100644 --- a/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-h4d-onspot-slurm.yaml @@ -79,12 +79,24 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ --user=sa_106486320838376751393 \ - --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=$${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ --extra-vars="enable_spot=$${ENABLE_SPOT}" \ - --extra-vars="@$${SLURM_VARS_FILE}" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@$${SLURM_VARS_FILE}" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ml-slurm.yaml b/tools/cloud-build/daily-tests/builds/ml-slurm.yaml index 26ac07632e..e4eabcefe8 100644 --- a/tools/cloud-build/daily-tests/builds/ml-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/ml-slurm.yaml @@ -41,6 +41,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -57,10 +58,22 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ml-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/ml-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/monitoring.yaml b/tools/cloud-build/daily-tests/builds/monitoring.yaml index 833787b734..80c7d878fd 100644 --- a/tools/cloud-build/daily-tests/builds/monitoring.yaml +++ b/tools/cloud-build/daily-tests/builds/monitoring.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -52,10 +53,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/monitoring.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/monitoring.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/netapp-volumes.yaml b/tools/cloud-build/daily-tests/builds/netapp-volumes.yaml index 39008f3576..76eed3ec31 100644 --- a/tools/cloud-build/daily-tests/builds/netapp-volumes.yaml +++ b/tools/cloud-build/daily-tests/builds/netapp-volumes.yaml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -50,10 +51,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/netapp-volumes.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/netapp-volumes.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/ofe-deployment.yaml b/tools/cloud-build/daily-tests/builds/ofe-deployment.yaml index 417c88d91d..98cfad5879 100644 --- a/tools/cloud-build/daily-tests/builds/ofe-deployment.yaml +++ b/tools/cloud-build/daily-tests/builds/ofe-deployment.yaml @@ -29,6 +29,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -43,10 +44,22 @@ steps: git init . # ofe deploymemt requires some git repo to figure out top level directory ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/ofe-deployment-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/ofe-deployment.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/ofe-deployment.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/packer.yaml b/tools/cloud-build/daily-tests/builds/packer.yaml index 8d58fed215..fdd9cab880 100644 --- a/tools/cloud-build/daily-tests/builds/packer.yaml +++ b/tools/cloud-build/daily-tests/builds/packer.yaml @@ -40,6 +40,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -54,10 +55,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/multigroup-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/packer.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/packer.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-slurm.yaml b/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-slurm.yaml index 7615564857..8c6e1b74c3 100644 --- a/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-slurm.yaml +++ b/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-slurm.yaml @@ -37,6 +37,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -50,10 +51,22 @@ steps: BLUEPRINT="examples/pfs-managed-lustre-slurm.yaml" bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/pfs-managed-lustre-slurm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/pfs-managed-lustre-slurm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-vm.yaml b/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-vm.yaml index 01ddc932ed..68c5f47387 100644 --- a/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-vm.yaml +++ b/tools/cloud-build/daily-tests/builds/pfs-managed-lustre-vm.yaml @@ -34,6 +34,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -47,10 +48,22 @@ steps: BLUEPRINT="examples/pfs-managed-lustre-vm.yaml" bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/pfs-managed-lustre-vm.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/pfs-managed-lustre-vm.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slinky.yml b/tools/cloud-build/daily-tests/builds/slinky.yml index bdcc5a7136..9808bc7737 100644 --- a/tools/cloud-build/daily-tests/builds/slinky.yml +++ b/tools/cloud-build/daily-tests/builds/slinky.yml @@ -36,6 +36,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -59,10 +60,22 @@ steps: echo ' add_deployment_name_before_prefix: true' >> $${EXAMPLE_BP} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/base-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slinky.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slinky.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-debian.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-debian.yaml index eecb13fecd..b39f123506 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-debian.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-debian.yaml @@ -37,6 +37,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -51,10 +52,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-debian.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-debian.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml index b0f8ffccf5..9dbaeb2672 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-rocky8.yaml @@ -37,6 +37,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -53,10 +54,22 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --extra-vars="test_prefix=${_TEST_PREFIX} use_fixed_vpc=true" \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-rocky8.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-rocky8.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ssd.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ssd.yaml index 70f5621cec..a1bbc3f6b1 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ssd.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ssd.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -52,10 +53,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-ssd.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-ssd.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-startup-scripts.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-startup-scripts.yaml index 1e5d81daca..406612be06 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-startup-scripts.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-startup-scripts.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -52,10 +53,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-startup-scripts.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-startup-scripts.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-tpu.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-tpu.yaml index 39d1f76a2f..46d6038970 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-tpu.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-tpu.yaml @@ -67,11 +67,23 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 \ - --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ --extra-vars="region=$${REGION} zone=$${ZONE}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-tpu.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-tpu.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ubuntu.yaml b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ubuntu.yaml index c9f0428f0a..3afe46c2da 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ubuntu.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gcp-v6-ubuntu.yaml @@ -37,6 +37,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -51,10 +52,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-ubuntu.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-v6-ubuntu.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-gke.yaml b/tools/cloud-build/daily-tests/builds/slurm-gke.yaml index 6a3692af89..016bbd5da8 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-gke.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-gke.yaml @@ -44,6 +44,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -58,10 +59,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook -v tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-gke.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-gke.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/slurm-rapid-storage.yaml b/tools/cloud-build/daily-tests/builds/slurm-rapid-storage.yaml index 295bb3b59c..2005ecec80 100644 --- a/tools/cloud-build/daily-tests/builds/slurm-rapid-storage.yaml +++ b/tools/cloud-build/daily-tests/builds/slurm-rapid-storage.yaml @@ -38,6 +38,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -51,10 +52,22 @@ steps: BLUEPRINT=examples/rapid-storage-slurm.yaml bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-rapid-storage.yaml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/slurm-rapid-storage.yaml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL' diff --git a/tools/cloud-build/daily-tests/builds/spack-gromacs.yaml b/tools/cloud-build/daily-tests/builds/spack-gromacs.yaml index 9de11f0a43..26346d9cee 100644 --- a/tools/cloud-build/daily-tests/builds/spack-gromacs.yaml +++ b/tools/cloud-build/daily-tests/builds/spack-gromacs.yaml @@ -41,6 +41,7 @@ steps: name: us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/test-runner entrypoint: /bin/bash env: + - "BUILD_ID=$BUILD_ID" - "ANSIBLE_HOST_KEY_CHECKING=false" - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" args: @@ -55,10 +56,22 @@ steps: bash tools/add_ttl_label.sh $${BLUEPRINT} ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ - --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ - --extra-vars="@tools/cloud-build/daily-tests/tests/spack-gromacs.yml" - secretEnv: ['GCLUSTER_GCS_PATH'] + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT} full_build_id=$BUILD_ID" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/spack-gromacs.yml" \ + --extra-vars="triage_gcs_bucket_override=$$TRIAGE_GCS_BUCKET" \ + --extra-vars="triage_project_number_override=$$TRIAGE_PROJECT_NUMBER" \ + --extra-vars="triage_invoker_sa_override=$$TRIAGE_INVOKER_SA" \ + --extra-vars="triage_cloud_run_url_override=$$TRIAGE_CLOUD_RUN_URL" + secretEnv: ['GCLUSTER_GCS_PATH', 'TRIAGE_GCS_BUCKET', 'TRIAGE_PROJECT_NUMBER', 'TRIAGE_INVOKER_SA', 'TRIAGE_CLOUD_RUN_URL'] availableSecrets: secretManager: - versionName: projects/${PROJECT_ID}/secrets/gcluster-develop-release-bucket/versions/latest env: 'GCLUSTER_GCS_PATH' + - versionName: projects/${PROJECT_ID}/secrets/triage-gcs-bucket/versions/latest + env: 'TRIAGE_GCS_BUCKET' + - versionName: projects/${PROJECT_ID}/secrets/triage-project-number/versions/latest + env: 'TRIAGE_PROJECT_NUMBER' + - versionName: projects/${PROJECT_ID}/secrets/triage-invoker-sa/versions/latest + env: 'TRIAGE_INVOKER_SA' + - versionName: projects/${PROJECT_ID}/secrets/triage-cloud-run-url/versions/latest + env: 'TRIAGE_CLOUD_RUN_URL'