diff --git a/.github/actions/build-mpas/action.yml b/.github/actions/build-mpas/action.yml new file mode 100644 index 0000000000..0db8a88847 --- /dev/null +++ b/.github/actions/build-mpas/action.yml @@ -0,0 +1,141 @@ +name: 'Build MPAS' +description: 'Build MPAS-Atmosphere with specified compiler configuration' + +inputs: + compiler: + description: 'Compiler family (gcc, nvhpc, oneapi, etc.)' + required: true + use-pio: + description: 'Use PIO library (true/false)' + required: false + default: 'false' + openacc: + description: 'Enable OpenACC (true/false)' + required: false + default: 'false' + precision: + description: 'Floating-point precision (single or double)' + required: false + default: 'single' + build-timeout: + description: 'Build timeout in minutes' + required: false + default: '20' + +outputs: + executable: + description: 'Path to built executable' + value: ${{ steps.build.outputs.executable }} + +runs: + using: 'composite' + steps: + - name: Build MPAS-A + id: build + shell: bash + run: | + # Source container environment if available + # This sets NETCDF, PNETCDF, PIO, and other library paths + if [ -f /container/config_env.sh ]; then + echo "Sourcing container environment from /container/config_env.sh" + source /container/config_env.sh + fi + + # MPAS Makefile uses `cpp` for Registry preprocessing. + # Some containers (e.g. openSUSE Leap) don't ship it separately. + if ! command -v cpp &>/dev/null; then + echo "cpp not found, installing..." + if command -v zypper &>/dev/null; then + zypper install -y --no-recommends cpp + elif command -v dnf &>/dev/null; then + dnf install -y cpp + elif command -v apt-get &>/dev/null; then + apt-get update && apt-get install -y cpp + fi + fi + + # Map compiler input to COMPILER_FAMILY if not already set + # The container should set this, but we provide a fallback + if [ -z "${COMPILER_FAMILY}" ]; then + case "${{ inputs.compiler }}" in + gcc|gfortran|gnu) export COMPILER_FAMILY="gcc" ;; + nvhpc|nvfortran) export COMPILER_FAMILY="nvhpc" ;; + oneapi|intel|ifx) export COMPILER_FAMILY="oneapi" ;; + llvm|flang|clang) export COMPILER_FAMILY="clang" ;; + *) export COMPILER_FAMILY="${{ inputs.compiler }}" ;; + esac + fi + + # Set up I/O configuration + if [ "${{ inputs.use-pio }}" = "true" ]; then + export PIO_ROOT=${PIO_ROOT:-/container/pio} + export USE_PIO2=true + else + unset PIO + export USE_PIO2=false + fi + + # Set up accelerator configuration + if [ "${{ inputs.openacc }}" = "true" ]; then + export OPENACC=true + fi + + echo "Build configuration:" + echo " Compiler input: ${{ inputs.compiler }}" + echo " COMPILER_FAMILY: ${COMPILER_FAMILY}" + echo " NETCDF: ${NETCDF:-not set}" + echo " PNETCDF: ${PNETCDF:-not set}" + echo " USE_PIO2: ${USE_PIO2}" + echo " OPENACC: ${OPENACC:-false}" + echo " PRECISION: ${{ inputs.precision }}" + + # Settings from .github/ci-config.env — edit that file to change targets/workarounds + CI_CONFIG="${GITHUB_WORKSPACE}/.github/ci-config.env" + if [ -f "${CI_CONFIG}" ]; then + source "${CI_CONFIG}" + fi + + # Map COMPILER_FAMILY to make target via ci-config.env lookup + VARNAME="MAKE_TARGET_${COMPILER_FAMILY}" + MAKE_TARGET="${!VARNAME}" + if [ -z "${MAKE_TARGET}" ]; then + echo "::error::No make target for COMPILER_FAMILY=${COMPILER_FAMILY}. Add MAKE_TARGET_${COMPILER_FAMILY}= to .github/ci-config.env" + exit 1 + fi + + EXTRA_MAKE_FLAGS="" + if [ "${COMPILER_FAMILY}" = "nvhpc" ]; then + ARCH="${NVHPC_TARGET_ARCH:--tp=px}" + sed -i "/^nvhpc:/,/^[a-z]/ s/\"FFLAGS_OPT = /\"FFLAGS_OPT = ${ARCH} /" Makefile + sed -i "/^nvhpc:/,/^[a-z]/ s/\"CFLAGS_OPT = /\"CFLAGS_OPT = ${ARCH} /" Makefile + sed -i "/^nvhpc:/,/^[a-z]/ s/\"CXXFLAGS_OPT = /\"CXXFLAGS_OPT = ${ARCH} /" Makefile + sed -i "/^nvhpc:/,/^[a-z]/ s/\"LDFLAGS_OPT = /\"LDFLAGS_OPT = ${ARCH} /" Makefile + EXTRA_MAKE_FLAGS="${NVHPC_EXTRA_MAKE_FLAGS}" + elif [ "${COMPILER_FAMILY}" = "oneapi" ]; then + EXTRA_MAKE_FLAGS="${ONEAPI_EXTRA_MAKE_FLAGS}" + fi + + echo " Make target: ${MAKE_TARGET}" + echo " Extra make flags: ${EXTRA_MAKE_FLAGS:-}" + echo " Parallel jobs: ${MAKE_J_PROCS:-$(nproc)}" + + PRECISION_FLAG="" + if [ "${{ inputs.precision }}" = "double" ]; then + PRECISION_FLAG="PRECISION=double" + fi + + timeout ${{ inputs.build-timeout }}m \ + make ${MAKE_TARGET} CORE=atmosphere ${EXTRA_MAKE_FLAGS} ${PRECISION_FLAG} --jobs ${MAKE_J_PROCS:-$(nproc)} + + # Set output + echo "executable=$(pwd)/atmosphere_model" >> $GITHUB_OUTPUT + + - name: Verify executable + shell: bash + run: | + if [ ! -f atmosphere_model ]; then + echo "ERROR: atmosphere_model not found!" + exit 1 + fi + ls -la atmosphere_model + file atmosphere_model diff --git a/.github/actions/download-testdata/action.yml b/.github/actions/download-testdata/action.yml new file mode 100644 index 0000000000..86d1457943 --- /dev/null +++ b/.github/actions/download-testdata/action.yml @@ -0,0 +1,97 @@ +name: 'Download Test Data' +description: 'Download and extract an MPAS test case archive from GitHub releases' + +inputs: + resolution: + description: 'Test case resolution (e.g., 240km, 120km). Used to look up RELEASE_TESTDATA_{RES} in ci-config.env.' + required: true + dest-dir: + description: 'Destination directory name for the extracted test case' + required: false + default: '' + +outputs: + case-dir: + description: 'Path to the extracted test case directory' + value: ${{ steps.extract.outputs.case-dir }} + +runs: + using: composite + steps: + - name: Resolve release tag and archive + id: resolve + shell: bash + run: | + RESOLUTION="${{ inputs.resolution }}" + ARCHIVE="${RESOLUTION}.tar.gz" + + CI_CONFIG="${GITHUB_WORKSPACE}/.github/ci-config.env" + if [ ! -f "${CI_CONFIG}" ]; then + echo "::error::ci-config.env not found at ${CI_CONFIG}" + exit 1 + fi + source "${CI_CONFIG}" + + RES_UPPER=$(echo "${RESOLUTION}" | tr '[:lower:]' '[:upper:]' | tr '-' '_') + TAG_VAR="RELEASE_TESTDATA_${RES_UPPER}" + TAG="${!TAG_VAR}" + + if [ -z "${TAG}" ]; then + echo "::error::No release tag for resolution '${RESOLUTION}'. Add ${TAG_VAR}= to ci-config.env." + exit 1 + fi + + REPO="${DATA_REPOSITORY:-${GITHUB_REPOSITORY}}" + URL="https://github.com/${REPO}/releases/download/${TAG}/${ARCHIVE}" + + echo "release-tag=${TAG}" >> $GITHUB_OUTPUT + echo "archive=${ARCHIVE}" >> $GITHUB_OUTPUT + echo "url=${URL}" >> $GITHUB_OUTPUT + echo "Resolved: ${TAG_VAR}=${TAG} → ${URL}" + + - name: Cache test case archive + id: cache + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 + with: + path: ${{ steps.resolve.outputs.archive }} + key: testdata-${{ steps.resolve.outputs.release-tag }} + + - name: Download test case archive + if: steps.cache.outputs.cache-hit != 'true' + shell: bash + run: | + URL="${{ steps.resolve.outputs.url }}" + ARCHIVE="${{ steps.resolve.outputs.archive }}" + echo "Downloading ${ARCHIVE} from ${URL}..." + curl -fsSL --retry 5 --retry-delay 5 "${URL}" -o "${ARCHIVE}" + echo "Downloaded $(du -h "${ARCHIVE}" | cut -f1)" + + - name: Extract test case + id: extract + shell: bash + run: | + ARCHIVE="${{ steps.resolve.outputs.archive }}" + DEST="${{ inputs.dest-dir }}" + + echo "Cache hit: ${{ steps.cache.outputs.cache-hit }}" + echo "Archive: ${ARCHIVE} ($(du -h "${ARCHIVE}" | cut -f1))" + + tar xzf "${ARCHIVE}" + + CASE_DIR=$(tar tzf "${ARCHIVE}" 2>/dev/null | head -1 | cut -d/ -f1 || true) + if [ -z "${CASE_DIR}" ]; then + CASE_DIR=$(ls -td */ 2>/dev/null | head -1 | tr -d '/') + fi + + if [ -z "${CASE_DIR}" ] || [ ! -d "${CASE_DIR}" ]; then + echo "::error::Failed to extract test case from ${ARCHIVE}" + exit 1 + fi + + if [ -n "${DEST}" ] && [ "${DEST}" != "${CASE_DIR}" ]; then + mv "${CASE_DIR}" "${DEST}" + CASE_DIR="${DEST}" + fi + + echo "case-dir=${CASE_DIR}" >> $GITHUB_OUTPUT + echo "Extracted test case to: ${CASE_DIR}" diff --git a/.github/actions/ect-summary/action.yml b/.github/actions/ect-summary/action.yml new file mode 100644 index 0000000000..6175f5a33a --- /dev/null +++ b/.github/actions/ect-summary/action.yml @@ -0,0 +1,100 @@ +name: 'ECT Summary' +description: > + Generate a consolidated Ensemble Consistency Test results table from + enriched result files produced by the validate-ect action. Writes a + Markdown table to $GITHUB_STEP_SUMMARY with auto-discovered columns. + +inputs: + results-path: + description: 'Directory containing downloaded ect-result-* artifact subdirectories' + required: true + +runs: + using: 'composite' + steps: + - name: Generate summary table + shell: bash + run: | + RESULTS_PATH="${{ inputs.results-path }}" + + # Collect all result files + RESULT_FILES=() + for f in "${RESULTS_PATH}"/ect-result-*/ect-result.txt; do + [ -f "$f" ] && RESULT_FILES+=("$f") + done + + if [ ${#RESULT_FILES[@]} -eq 0 ]; then + echo "::warning::No ECT result files found in ${RESULTS_PATH}" + echo "## Ensemble Consistency Test (ECT) Results" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "No results available." >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + + # Discover column names from the first result file (all keys except "result") + COLUMNS=() + while IFS='=' read -r key value; do + [ -z "$key" ] && continue + if [ "$key" != "result" ]; then + COLUMNS+=("$key") + fi + done < "${RESULT_FILES[0]}" + + # Build header row + HEADER="| " + SEPARATOR="| " + for col in "${COLUMNS[@]}"; do + COL_TITLE=$(echo "$col" | sed 's/.*/\u&/') + HEADER+="${COL_TITLE} | " + SEPARATOR+="--- | " + done + HEADER+="Result |" + SEPARATOR+="--- |" + + # Build data rows + PASS=0 FAIL=0 ERROR=0 SKIP=0 TOTAL=0 + ROWS="" + for f in "${RESULT_FILES[@]}"; do + TOTAL=$((TOTAL + 1)) + + # Parse key=value pairs + declare -A DATA=() + while IFS='=' read -r key value; do + [ -z "$key" ] && continue + DATA["$key"]="$value" + done < "$f" + + RESULT="${DATA[result]}" + case "$RESULT" in + PASSED) ICON="PASSED"; PASS=$((PASS + 1)) ;; + FAILED) ICON="**FAILED**"; FAIL=$((FAIL + 1)) ;; + SKIPPED) ICON="SKIPPED"; SKIP=$((SKIP + 1)) ;; + *) ICON="ERROR"; ERROR=$((ERROR + 1)) ;; + esac + + ROW="| " + for col in "${COLUMNS[@]}"; do + ROW+="${DATA[$col]:-—} | " + done + ROW+="${ICON} |" + ROWS+="${ROW}"$'\n' + + unset DATA + done + + SORTED_ROWS=$(echo "$ROWS" | sort) + + { + echo "## Ensemble Consistency Test (ECT) Results" + echo "" + echo "$HEADER" + echo "$SEPARATOR" + echo "$SORTED_ROWS" + echo "" + echo "**Total: ${TOTAL}** — ${PASS} passed, ${FAIL} failed, ${ERROR} error, ${SKIP} skipped" + + if [ ${FAIL} -gt 0 ] || [ ${ERROR} -gt 0 ]; then + echo "" + echo "> One or more ECT validations failed. Check individual ECT Validate job logs for PyCECT details." + fi + } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/actions/mpas-version/action.yml b/.github/actions/mpas-version/action.yml new file mode 100644 index 0000000000..73c3b3e47a --- /dev/null +++ b/.github/actions/mpas-version/action.yml @@ -0,0 +1,47 @@ +name: 'Get MPAS Version' +description: > + Read the MPAS version string from src/core_atmosphere/Registry.xml. + Strict — fails the workflow if the file is missing or the version + attribute cannot be parsed (no silent "unknown" fallback). + +inputs: + registry-path: + description: 'Path to Registry.xml relative to GITHUB_WORKSPACE' + required: false + default: 'src/core_atmosphere/Registry.xml' + +outputs: + version: + description: 'MPAS version string (e.g., 8.4.0)' + value: ${{ steps.extract.outputs.version }} + +runs: + using: 'composite' + steps: + - id: extract + shell: bash + run: | + python3 - "${{ inputs.registry-path }}" <<'PYEOF' + import os + import sys + import xml.etree.ElementTree as ET + + path = sys.argv[1] + if not os.path.isfile(path): + print(f"::error::MPAS Registry not found at {path}") + sys.exit(1) + try: + root = ET.parse(path).getroot() + except ET.ParseError as e: + print(f"::error::Could not parse {path}: {e}") + sys.exit(1) + + version = root.attrib.get('version') + if not version: + print(f"::error::No version attribute on in {path}") + sys.exit(1) + + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f"version={version}\n") + print(f"MPAS version: {version}") + PYEOF diff --git a/.github/actions/print-mpas-logs/action.yml b/.github/actions/print-mpas-logs/action.yml new file mode 100644 index 0000000000..53ed7e33b1 --- /dev/null +++ b/.github/actions/print-mpas-logs/action.yml @@ -0,0 +1,64 @@ +name: 'Print MPAS Logs' +description: > + Print MPAS per-rank log files (log.atmosphere..out / .err) to the + workflow log inside collapsible ::group:: blocks. Read-only; never fails + on its own (use if: always() at the call site). + +inputs: + log-dir: + description: 'Directory to scan for log files' + required: true + pattern: + description: > + Glob (relative to log-dir) of files to print. Default catches both + .out and .err per rank. + required: false + default: 'log.atmosphere.*' + max-lines: + description: > + If set, print only the last N lines of each file via tail. Empty (default) + prints the whole file. + required: false + default: '' + +runs: + using: 'composite' + steps: + - name: Print logs + shell: bash + run: | + LOG_DIR="${{ inputs.log-dir }}" + PATTERN="${{ inputs.pattern }}" + MAX_LINES="${{ inputs.max-lines }}" + + if [ ! -d "${LOG_DIR}" ]; then + echo "print-mpas-logs: directory '${LOG_DIR}' does not exist; nothing to print." + exit 0 + fi + + cd "${LOG_DIR}" + shopt -s nullglob + + # Sort so .out comes before .err per rank (lexical order does this). + FILES=( $(ls -1 ${PATTERN} 2>/dev/null | sort) ) + + if [ ${#FILES[@]} -eq 0 ]; then + echo "print-mpas-logs: no files matching '${PATTERN}' in ${LOG_DIR}." + exit 0 + fi + + echo "print-mpas-logs: ${#FILES[@]} file(s) from ${LOG_DIR}" + # Close the implicit group GitHub opens for the run: script source so + # the per-file groups below render at the step's top level (collapsible + # at normal font size) instead of nested inside the script-source group. + echo "::endgroup::" + for f in "${FILES[@]}"; do + echo "::group::${f}" + if [ -n "${MAX_LINES}" ]; then + echo "(last ${MAX_LINES} lines)" + tail -n "${MAX_LINES}" "${f}" || true + else + cat "${f}" || true + fi + echo "::endgroup::" + done diff --git a/.github/actions/resolve-container/action.yml b/.github/actions/resolve-container/action.yml new file mode 100644 index 0000000000..fccafffd76 --- /dev/null +++ b/.github/actions/resolve-container/action.yml @@ -0,0 +1,65 @@ +name: 'Resolve Container Image' +description: 'Assemble a container image name from ci-config.env templates' + +inputs: + compiler: + description: 'Compiler family (gcc, nvhpc, oneapi)' + required: true + mpi: + description: 'MPI implementation (openmpi, mpich)' + required: true + gpu: + description: 'GPU variant (empty string, or "cuda")' + required: false + default: '' + +outputs: + image: + description: 'Fully qualified container image name' + value: ${{ steps.resolve.outputs.image }} + +runs: + using: 'composite' + steps: + - name: Resolve container image + id: resolve + shell: bash + run: | + CI_CONFIG="${GITHUB_WORKSPACE}/.github/ci-config.env" + if [ ! -f "${CI_CONFIG}" ]; then + echo "::error::ci-config.env not found at ${CI_CONFIG}" + exit 1 + fi + source "${CI_CONFIG}" + + COMPILER="${{ inputs.compiler }}" + MPI="${{ inputs.mpi }}" + GPU="${{ inputs.gpu }}" + + # Resolve compiler name (fallback to raw value if no mapping) + comp_var="CONTAINER_COMPILER_${COMPILER}" + COMPILER_NAME="${!comp_var:-$COMPILER}" + + # Resolve MPI name (fallback to raw value if no mapping) + mpi_var="CONTAINER_MPI_${MPI}" + MPI_NAME="${!mpi_var:-$MPI}" + + # Pick template: GPU vs CPU, then check for per-compiler override + if [ -n "${GPU}" ]; then + gpu_override="CONTAINER_IMAGE_GPU_${COMPILER}" + TEMPLATE="${!gpu_override:-$CONTAINER_IMAGE_GPU}" + else + cpu_override="CONTAINER_IMAGE_${COMPILER}" + TEMPLATE="${!cpu_override:-$CONTAINER_IMAGE}" + fi + + # Substitute placeholders + IMAGE="${TEMPLATE//\{compiler\}/$COMPILER_NAME}" + IMAGE="${IMAGE//\{mpi\}/$MPI_NAME}" + + echo "Resolved container image: ${IMAGE}" + echo " Compiler: ${COMPILER} → ${COMPILER_NAME}" + echo " MPI: ${MPI} → ${MPI_NAME}" + echo " GPU: ${GPU:-none}" + + echo "image=${IMAGE}" >> $GITHUB_OUTPUT diff --git a/.github/actions/run-mpas/action.yml b/.github/actions/run-mpas/action.yml new file mode 100644 index 0000000000..063697a90b --- /dev/null +++ b/.github/actions/run-mpas/action.yml @@ -0,0 +1,195 @@ +name: 'Run MPAS' +description: 'Run MPAS-Atmosphere test case' + +inputs: + executable: + description: 'Path to atmosphere_model executable' + required: false + default: './atmosphere_model' + num-procs: + description: 'Number of MPI processes' + required: false + default: '1' + run-duration: + description: 'Run duration (format: D_HH:MM:SS). If empty, uses the namelist default from the test case archive.' + required: false + default: '' + restart-interval: + description: 'Restart output interval (format: D_HH:MM:SS). If empty, uses the streams.atmosphere default.' + required: false + default: '' + resolution: + description: 'Test case resolution name (e.g., 240km). Used to download the archive and name the working directory.' + required: false + default: '240km' + mpi-impl: + description: 'MPI implementation (openmpi, mpich)' + required: false + default: '' + run-timeout: + description: 'Run timeout in minutes' + required: false + default: '20' + working-dir: + description: 'Working directory name for the run' + required: false + default: '' + strict-exit-check: + description: 'Fail on non-zero exit code (default true). Set false when gfortran IEEE warnings produce non-zero exit but model output is valid.' + required: false + default: 'true' + +outputs: + log-dir: + description: 'Directory containing log files' + value: ${{ steps.run.outputs.log-dir }} + status: + description: 'Run status (success/failed)' + value: ${{ steps.run.outputs.status }} + +runs: + using: 'composite' + steps: + - name: Resolve configuration + id: config + shell: bash + run: | + RESOLUTION="${{ inputs.resolution }}" + WORKDIR="${{ inputs.working-dir }}" + if [ -z "${WORKDIR}" ]; then + WORKDIR="run-${RESOLUTION}" + fi + + echo "workdir=${WORKDIR}" >> $GITHUB_OUTPUT + echo "resolution=${RESOLUTION}" >> $GITHUB_OUTPUT + + echo "=== Run configuration ===" + echo " Resolution: ${RESOLUTION}" + echo " Work dir: ${WORKDIR}" + echo " Timeout: ${{ inputs.run-timeout }}m" + if [ -n "${{ inputs.run-duration }}" ]; then + echo " Duration: ${{ inputs.run-duration }} (override)" + else + echo " Duration: (namelist default)" + fi + + - name: Download test case + uses: ./.github/actions/download-testdata + id: download + with: + resolution: ${{ inputs.resolution }} + dest-dir: ${{ steps.config.outputs.workdir }} + + - name: Link executable + shell: bash + run: | + WORKDIR="${{ steps.config.outputs.workdir }}" + chmod +x ${{ inputs.executable }} + ln -sf $(realpath ${{ inputs.executable }}) "${WORKDIR}/atmosphere_model" + + - name: Configure namelist overrides + shell: bash + working-directory: ${{ steps.config.outputs.workdir }} + run: | + if [ -n "${{ inputs.run-duration }}" ]; then + DURATION="${{ inputs.run-duration }}" + sed -i "s/config_run_duration = '[^']*'/config_run_duration = '${DURATION}'/" namelist.atmosphere + echo "Overrode config_run_duration = '${DURATION}'" + fi + + if [ -n "${{ inputs.restart-interval }}" ]; then + RESTART="${{ inputs.restart-interval }}" + sed -i '// s/output_interval="[^"]*"/output_interval="'"${RESTART}"'"/' streams.atmosphere + echo "Overrode restart output_interval = '${RESTART}'" + fi + + echo "=== Namelist ===" + grep config_run_duration namelist.atmosphere + + - name: Run MPAS-A + id: run + shell: bash + working-directory: ${{ steps.config.outputs.workdir }} + run: | + TIMEOUT="${{ inputs.run-timeout }}" + + if [ -f /container/config_env.sh ]; then + source /container/config_env.sh + fi + + # Workaround: some containers omit LD_LIBRARY_PATH from config_env.sh + if [ -z "${LD_LIBRARY_PATH}" ]; then + export LD_LIBRARY_PATH="/usr/lib64:/usr/lib" + fi + + if [ -n "${{ inputs.mpi-impl }}" ]; then + export MPI_IMPL="${{ inputs.mpi-impl }}" + fi + + CI_CONFIG="${GITHUB_WORKSPACE}/.github/ci-config.env" + if [ -f "${CI_CONFIG}" ]; then + source "${CI_CONFIG}" + fi + + MPI_FLAGS="" + if [ "${MPI_IMPL}" = "openmpi" ]; then + MPI_FLAGS="${OPENMPI_RUN_FLAGS:---allow-run-as-root --oversubscribe}" + fi + + ulimit -s unlimited 2>/dev/null || echo "Warning: Could not set unlimited stack size" + + echo "=== Run configuration ===" + echo " Resolution: ${{ steps.config.outputs.resolution }}" + echo " Processors: ${{ inputs.num-procs }}" + echo " MPI_IMPL: ${MPI_IMPL:-auto}" + echo " MPI_FLAGS: ${MPI_FLAGS}" + echo " Stack limit: $(ulimit -s)" + echo " Available CPUs: $(nproc 2>/dev/null || echo unknown)" + echo " Available RAM: $(free -m 2>/dev/null | awk '/^Mem:/{print $2 "MB"}' || echo unknown)" + echo " LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}" + + set +e + timeout ${TIMEOUT}m mpirun -n ${{ inputs.num-procs }} ${MPI_FLAGS} ./atmosphere_model + RUN_STATUS=$? + set -e + + echo "log-dir=$(pwd)" >> $GITHUB_OUTPUT + echo "run-exit-code=${RUN_STATUS}" >> $GITHUB_OUTPUT + if [ $RUN_STATUS -eq 0 ]; then + echo "status=success" >> $GITHUB_OUTPUT + elif [ "${{ inputs.strict-exit-check }}" = "false" ]; then + echo "status=success" >> $GITHUB_OUTPUT + echo "::warning::Model exited with status ${RUN_STATUS} (non-strict mode, treating as success)" + else + echo "status=failed" >> $GITHUB_OUTPUT + echo "::warning::Model run exited with status $RUN_STATUS" + fi + + - name: List output files + shell: bash + if: always() + run: | + WORKDIR="${{ steps.config.outputs.workdir }}" + echo "=== Output files ===" + if [ -d "${WORKDIR}" ]; then + cd "${WORKDIR}" + ls -la log.* 2>/dev/null || echo "No log files found" + ls -la *.nc 2>/dev/null || echo "No NetCDF files found" + else + echo "Working directory ${WORKDIR} does not exist" + fi + + - name: Print MPAS logs + if: always() + uses: ./.github/actions/print-mpas-logs + with: + log-dir: ${{ steps.config.outputs.workdir }} + + - name: Check run status + shell: bash + if: always() + run: | + if [ "${{ steps.run.outputs.status }}" = "failed" ]; then + echo "::error::MPAS model run failed with exit code ${{ steps.run.outputs.run-exit-code }}" + exit 1 + fi diff --git a/.github/actions/run-perturb-mpas/action.yml b/.github/actions/run-perturb-mpas/action.yml new file mode 100644 index 0000000000..9af1660708 --- /dev/null +++ b/.github/actions/run-perturb-mpas/action.yml @@ -0,0 +1,284 @@ +name: 'Run Perturbed MPAS' +description: > + Run one or more perturbed MPAS-A ensemble members for ECT. + Handles IC perturbation, namelist/stream configuration, model execution, + and optional history file trimming. Supports both single-member + (ect-test) and batched (ect-ensemble-gen) use cases. + +inputs: + base-dir: + description: 'Path to the base test case directory (copied per member)' + required: true + executable: + description: 'Path to atmosphere_model executable' + required: true + member-start: + description: 'First ensemble member index' + required: true + member-end: + description: 'Last ensemble member index (same as member-start for single member)' + required: true + run-duration: + description: 'Model run duration per member (format: D_HH:MM:SS)' + required: true + run-timeout: + description: 'Per-member timeout in minutes' + required: false + default: '45' + num-ranks: + description: 'Number of MPI ranks per member' + required: false + default: '1' + mpi-impl: + description: 'MPI implementation (openmpi, mpich)' + required: false + default: 'openmpi' + output-dir: + description: 'Directory to collect trimmed history files' + required: false + default: 'history-output' + trim: + description: 'Trim history files after run (true/false)' + required: false + default: 'true' + restart-file: + description: 'Path to a spun-up restart file. When set, each member starts from this restart (with config_do_restart=.true.) instead of init.nc.' + required: false + default: '' + verbose: + description: 'Print detailed diagnostics for each member (namelist dumps, theta stats)' + required: false + default: 'false' + +outputs: + status: + description: 'Overall status (success/partial/failed)' + value: ${{ steps.run-members.outputs.status }} + members-completed: + description: 'Number of members that produced history files' + value: ${{ steps.run-members.outputs.members-completed }} + +runs: + using: 'composite' + steps: + - name: Run perturbed members + id: run-members + shell: bash + run: | + source /container/config_env.sh + + if command -v conda &>/dev/null; then + eval "$(conda shell.bash hook)" 2>/dev/null + conda activate base 2>/dev/null || true + fi + + if ! python3 -c "import netCDF4, numpy" 2>/dev/null; then + if python3 -m pip install --quiet netCDF4 numpy 2>/dev/null; then + echo "Installed netCDF4/numpy via pip" + elif command -v conda &>/dev/null; then + conda install -y -q netCDF4 numpy + echo "Installed netCDF4/numpy via conda" + else + echo "::error::Could not install netCDF4 (no pip, no conda)" + exit 1 + fi + fi + echo "python3: $(which python3) — $(python3 --version)" + + if [ -z "${LD_LIBRARY_PATH}" ]; then + export LD_LIBRARY_PATH="/usr/lib64:/usr/lib" + fi + + CI_CONFIG="${GITHUB_WORKSPACE}/.github/ci-config.env" + if [ -f "${CI_CONFIG}" ]; then + source "${CI_CONFIG}" + fi + ulimit -s unlimited 2>/dev/null || true + + BASEDIR="${{ inputs.base-dir }}" + EXE="${{ inputs.executable }}" + MEMBER_START=${{ inputs.member-start }} + MEMBER_END=${{ inputs.member-end }} + NRANKS=${{ inputs.num-ranks }} + MPI_IMPL="${{ inputs.mpi-impl }}" + OUTDIR="${{ inputs.output-dir }}" + TRIM="${{ inputs.trim }}" + RESTART_FILE="${{ inputs.restart-file }}" + VERBOSE="${{ inputs.verbose }}" + RUN_DURATION="${{ inputs.run-duration }}" + RUN_TIMEOUT="${{ inputs.run-timeout }}" + + MPI_FLAGS="" + if [ "${MPI_IMPL}" = "openmpi" ]; then + MPI_FLAGS="${OPENMPI_RUN_FLAGS:---allow-run-as-root --oversubscribe}" + fi + + EXCLUDE_FILE="" + if [ -n "${ECT_EXCLUDED_VARS}" ]; then + EXCLUDE_FILE="${GITHUB_WORKSPACE}/${ECT_EXCLUDED_VARS}" + fi + + mkdir -p "${OUTDIR}" + + TOTAL=0 + COMPLETED=0 + + for MEMBER in $(seq ${MEMBER_START} ${MEMBER_END}); do + TOTAL=$((TOTAL + 1)) + MEMBER_ID=$(printf "%04d" ${MEMBER}) + RUNDIR="run-ect-${MEMBER_ID}" + echo "" + echo "==========================================" + echo " Ensemble member ${MEMBER_ID}" + echo "==========================================" + + cp -r "${BASEDIR}" "${RUNDIR}" + chmod +x "${EXE}" + ln -sf $(realpath "${EXE}") "${RUNDIR}/atmosphere_model" + + if [ -n "${RESTART_FILE}" ]; then + RESTART_TIME=$(python3 -c " + import netCDF4, sys + ds = netCDF4.Dataset(sys.argv[1]) + print(ds.variables['xtime'][0].tobytes().decode().strip()) + ds.close() + " "${RESTART_FILE}") + RESTART_FNAME="restart.$(echo ${RESTART_TIME} | tr ':' '.').nc" + cp "${RESTART_FILE}" "${RUNDIR}/${RESTART_FNAME}" + echo "${RESTART_TIME}" > "${RUNDIR}/restart_timestamp" + PERTURB_FILE="${RUNDIR}/${RESTART_FNAME}" + echo "[$(date +%H:%M:%S)] Restart mode: ${RESTART_FNAME} (time=${RESTART_TIME})" + else + PERTURB_FILE="${RUNDIR}/init.nc" + if [ ! -f "${PERTURB_FILE}" ]; then + PERTURB_FILE=$(ls ${RUNDIR}/*.init*.nc 2>/dev/null | head -1) + fi + fi + + if [ -z "${PERTURB_FILE}" ] || [ ! -f "${PERTURB_FILE}" ]; then + echo "::error::Member ${MEMBER_ID}: could not find file to perturb" + rm -rf "${RUNDIR}" + continue + fi + + echo "[$(date +%H:%M:%S)] Perturbing theta (seed=${MEMBER})..." + python3 ${GITHUB_WORKSPACE}/.github/actions/run-perturb-mpas/perturb_theta.py \ + "${PERTURB_FILE}" --seed ${MEMBER} --magnitude ${ECT_PERTURB_MAGNITUDE} + + if [ "${VERBOSE}" = "true" ]; then + echo " Restart MD5: $(md5sum "${PERTURB_FILE}" | cut -d' ' -f1)" + fi + + cd "${RUNDIR}" + + if [ -n "${RESTART_FILE}" ]; then + sed -i "s/config_do_restart.*/config_do_restart = .true./" namelist.atmosphere + sed -i "s/config_start_time.*/config_start_time = 'file'/" namelist.atmosphere + sed -i '/&restart/a\ config_do_DAcycling = .true.' namelist.atmosphere + fi + + sed -i "s/config_run_duration = '[^']*'/config_run_duration = '${RUN_DURATION}'/" namelist.atmosphere + sed -i '// s/output_interval="[^"]*"/output_interval="none"/' streams.atmosphere + sed -i '// s/output_interval="[^"]*"/output_interval="'"${RUN_DURATION}"'"/' streams.atmosphere + + echo "[$(date +%H:%M:%S)] Running MPAS-A (${RUN_DURATION}, ${NRANKS} ranks)..." + set +e + timeout ${RUN_TIMEOUT}m mpirun -n ${NRANKS} ${MPI_FLAGS} ./atmosphere_model + RUN_STATUS=$? + set -e + echo "[$(date +%H:%M:%S)] Model finished (exit code ${RUN_STATUS})" + + for LOGFILE in log.atmosphere.*.out log.atmosphere.*.err; do + [ -f "${LOGFILE}" ] || continue + EXT="${LOGFILE##*.}" + BASE="${LOGFILE%.*}" + cp "${LOGFILE}" "../${OUTDIR}/${BASE}.member${MEMBER_ID}.${EXT}" + done + + if [ "${VERBOSE}" = "true" ]; then + echo "[$(date +%H:%M:%S)] === Namelist config ===" + grep -E 'config_do_restart|config_start_time|config_run_duration' namelist.atmosphere || true + echo "[$(date +%H:%M:%S)] === Restart stream ===" + grep -A2 'immutable_stream name="restart"' streams.atmosphere || true + echo "[$(date +%H:%M:%S)] === Output stream ===" + grep -A2 'stream name="output"' streams.atmosphere || true + + RESTART_NC=$(ls restart.*.nc 2>/dev/null | head -1) + if [ -n "${RESTART_NC}" ]; then + echo "[$(date +%H:%M:%S)] === Restart theta check ===" + python3 -c " + import netCDF4 as nc, numpy as np, sys + ds = nc.Dataset(sys.argv[1]) + th = ds.variables['theta'][:] + print(f' File: {sys.argv[1]}') + print(f' theta dtype={th.dtype} shape={th.shape} mean={np.mean(th):.15e}') + ds.close() + " "${RESTART_NC}" + fi + fi + + echo "[$(date +%H:%M:%S)] History files produced:" + ls -la history.*.nc 2>/dev/null || echo " (none)" + HIST_FILE=$(ls -t history.*.nc 2>/dev/null | head -1 || true) + if [ -n "${HIST_FILE}" ]; then + if [ "${VERBOSE}" = "true" ]; then + echo "[$(date +%H:%M:%S)] === History theta check ===" + python3 -c " + import netCDF4 as nc, numpy as np, sys + ds = nc.Dataset(sys.argv[1]) + if 'theta' in ds.variables: + th = ds.variables['theta'] + data = th[:] + print(f' theta dtype={data.dtype} shape={data.shape} mean={np.mean(data):.15e} min={np.min(data):.10e} max={np.max(data):.10e}') + else: + print(' theta NOT in history variables') + print(f' Available: {list(ds.variables.keys())[:15]}...') + ds.close() + " "${HIST_FILE}" + fi + TSLICE=$(python3 -c " + import netCDF4, sys + ds = netCDF4.Dataset(sys.argv[1]) + print(ds.dimensions['Time'].size - 1) + ds.close() + " "${HIST_FILE}") + echo "[$(date +%H:%M:%S)] Using tslice=${TSLICE} (last time slice)" + if [ "${TRIM}" = "true" ] && [ -n "${EXCLUDE_FILE}" ] && [ -f "${EXCLUDE_FILE}" ]; then + python3 ${GITHUB_WORKSPACE}/.github/actions/run-perturb-mpas/trim_history.py \ + "${HIST_FILE}" "../${OUTDIR}/history.${MEMBER_ID}.nc" \ + --tslice ${TSLICE} \ + --exclude-file "${EXCLUDE_FILE}" + echo "[$(date +%H:%M:%S)] Saved trimmed history for member ${MEMBER_ID}" + else + cp "${HIST_FILE}" "../${OUTDIR}/history.${MEMBER_ID}.nc" + echo "[$(date +%H:%M:%S)] Saved history for member ${MEMBER_ID}" + fi + COMPLETED=$((COMPLETED + 1)) + else + echo "::warning::Member ${MEMBER_ID} failed (exit ${RUN_STATUS}, no history file)" + fi + + cd .. + rm -rf "${RUNDIR}" + done + + echo "" + echo "=== Completed ${COMPLETED}/${TOTAL} members ===" + ls -la "${OUTDIR}/" + + echo "members-completed=${COMPLETED}" >> $GITHUB_OUTPUT + if [ ${COMPLETED} -eq ${TOTAL} ]; then + echo "status=success" >> $GITHUB_OUTPUT + elif [ ${COMPLETED} -gt 0 ]; then + echo "status=partial" >> $GITHUB_OUTPUT + else + echo "status=failed" >> $GITHUB_OUTPUT + echo "::error::No ensemble members produced history files" + exit 1 + fi + + - name: Print MPAS logs + if: always() + uses: ./.github/actions/print-mpas-logs + with: + log-dir: ${{ inputs.output-dir }} diff --git a/.github/actions/run-perturb-mpas/perturb_theta.py b/.github/actions/run-perturb-mpas/perturb_theta.py new file mode 100644 index 0000000000..62ab4520f7 --- /dev/null +++ b/.github/actions/run-perturb-mpas/perturb_theta.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +""" +Apply O(10^-14) perturbation to the theta (potential temperature) field +in an MPAS initial conditions file to generate ensemble diversity. + +This is a placeholder until native perturbation support is added to MPAS-A. +Each ensemble member uses a unique seed for reproducible perturbations. +""" + +import argparse +import sys + +import numpy as np + +try: + from netCDF4 import Dataset +except ImportError: + print("ERROR: netCDF4 is required. Install with: pip install netCDF4") + sys.exit(1) + + +def perturb_theta(ic_file, seed, magnitude=1e-14): + rng = np.random.default_rng(seed) + + with Dataset(ic_file, "r+") as ds: + if "theta" not in ds.variables: + print(f"ERROR: 'theta' variable not found in {ic_file}") + print(f" Available variables: {list(ds.variables.keys())[:20]}...") + sys.exit(1) + + theta = ds.variables["theta"] + data = theta[:] + original_mean = float(np.mean(data)) + + perturbation = rng.uniform(-magnitude, magnitude, size=data.shape) + theta[:] = data * (1.0 + perturbation) + + actual_max = np.max(np.abs(perturbation)) + print(f"Applied perturbation to theta field:") + print(f" File: {ic_file}") + print(f" Format: {ds.data_model}") + print(f" Seed: {seed}") + print(f" Magnitude: +/- {magnitude:.0e}") + print(f" Max |eps|: {actual_max:.2e}") + print(f" Shape: {data.shape}") + print(f" Var dtype: {theta.dtype}") + print(f" Original mean: {original_mean:.15e}") + + # Read-back verification: reopen and confirm perturbation persisted + with Dataset(ic_file, "r") as ds: + verify = ds.variables["theta"][:] + verify_mean = float(np.mean(verify)) + diff = verify.astype(np.float64) - data.astype(np.float64) + n_changed = int(np.count_nonzero(diff)) + max_diff = float(np.max(np.abs(diff))) + print(f" Verify mean: {verify_mean:.15e}") + print(f" Changed cells: {n_changed}/{diff.size}") + print(f" Max |diff|: {max_diff:.6e}") + if n_changed == 0: + print(f"ERROR: Perturbation did NOT persist in file!") + print(f" On-disk dtype: {ds.variables['theta'].dtype}") + print(f" If dtype is float32, perturbations below ~1.2e-7 will be rounded away.") + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser( + description="Perturb MPAS theta field for ensemble generation" + ) + parser.add_argument("ic_file", help="Path to MPAS initial conditions NetCDF file") + parser.add_argument("--seed", type=int, required=True, + help="Random seed for reproducible perturbation") + parser.add_argument("--magnitude", type=float, default=1e-14, + help="Perturbation magnitude (default: 1e-14)") + args = parser.parse_args() + + perturb_theta(args.ic_file, args.seed, args.magnitude) + + +if __name__ == "__main__": + main() diff --git a/.github/actions/run-perturb-mpas/trim_history.py b/.github/actions/run-perturb-mpas/trim_history.py new file mode 100644 index 0000000000..706b4268ef --- /dev/null +++ b/.github/actions/run-perturb-mpas/trim_history.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +Trim MPAS history files for ECT ensemble processing. + +Extracts a single time slice, removes excluded variables, strips most +static mesh geometry, and applies lossless NetCDF4 deflation (zlib). + +PyCECT requires three area-weighting variables (areaCell, dvEdge, +areaTriangle) to compute global means. These static variables are +preserved even though they lack a Time dimension. + +The ECT exclusion list path is configured via ECT_EXCLUDED_VARS in ci-config.env +and passed via --exclude-file. + +Usage: + python3 trim_history.py input.nc output.nc --tslice -1 --exclude-file excluded_vars.txt +""" + +import argparse +import os +import sys + +import netCDF4 as nc + +# Static variables PyCECT reads for area-weighted global means +# (see pyEnsLib.py generate_global_mean_for_summary_MPAS, lines 745-758) +PYCECT_REQUIRED_STATIC = {'areaCell', 'dvEdge', 'areaTriangle'} + + +def trim_history(infile, outfile, tslice, exclude_vars=None): + exclude = set(exclude_vars or []) + + with nc.Dataset(infile, 'r') as src, nc.Dataset(outfile, 'w', format='NETCDF4') as dst: + ntime = src.dimensions['Time'].size + if tslice < 0: + tslice = ntime + tslice + if tslice < 0 or tslice >= ntime: + print(f"ERROR: tslice={tslice} out of range for {ntime} time slice(s) in {infile}") + sys.exit(1) + dst.setncatts({k: src.getncattr(k) for k in src.ncattrs()}) + + # Copy ALL dimensions — PyCECT checks nCells/nEdges/nVertices + for dname, dim in src.dimensions.items(): + if dname == 'Time': + dst.createDimension(dname, 1) + else: + dst.createDimension(dname, len(dim)) + + # Identify variables to keep: + # 1. Time-varying variables not in the exclude list + # 2. Static variables required by PyCECT for area weighting + keep_dynamic = {} + keep_static = {} + for name, var in src.variables.items(): + if name in exclude: + continue + if 'Time' in var.dimensions: + keep_dynamic[name] = var + elif name in PYCECT_REQUIRED_STATIC: + keep_static[name] = var + + kept = 0 + + # Write static variables (no time slicing needed) + for name, var in keep_static.items(): + outvar = dst.createVariable(name, var.dtype, var.dimensions) + outvar.setncatts({k: var.getncattr(k) for k in var.ncattrs()}) + outvar[:] = var[:] + kept += 1 + + # Write time-varying variables (extract single time slice, compress) + for name, var in keep_dynamic.items(): + dims = var.dimensions + use_zlib = var.size > 1000 + outvar = dst.createVariable( + name, var.dtype, dims, + zlib=use_zlib, complevel=1) + outvar.setncatts({k: var.getncattr(k) for k in var.ncattrs()}) + + tidx = dims.index('Time') + slices = [slice(None)] * len(dims) + slices[tidx] = slice(tslice, tslice + 1) + outvar[:] = var[tuple(slices)] + kept += 1 + + skipped = len(src.variables) - kept + + in_size = os.path.getsize(infile) / 1048576 + out_size = os.path.getsize(outfile) / 1048576 + print(f"Kept {kept} variables ({len(keep_dynamic)} dynamic + " + f"{len(keep_static)} static), dropped {skipped}, " + f"tslice={tslice}, {in_size:.0f}MB -> {out_size:.0f}MB") + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Trim MPAS history files for ECT') + parser.add_argument('input', help='Input history file') + parser.add_argument('output', help='Output trimmed file') + parser.add_argument('--tslice', type=int, default=-1, + help='Time slice index to extract (negative counts from end, e.g. -1 = last)') + parser.add_argument('--exclude-file', + help='File listing variable names to exclude') + args = parser.parse_args() + + exclude = [] + if args.exclude_file: + with open(args.exclude_file) as f: + exclude = [line.strip() for line in f + if line.strip() and not line.startswith('#')] + + trim_history(args.input, args.output, args.tslice, exclude) diff --git a/.github/actions/validate-ect/action.yml b/.github/actions/validate-ect/action.yml new file mode 100644 index 0000000000..c6037eaa89 --- /dev/null +++ b/.github/actions/validate-ect/action.yml @@ -0,0 +1,210 @@ +name: 'Validate ECT' +description: > + Run PyCECT against an ensemble summary file to validate history output. + Installs PyCECT at the pinned version from ci-config.env, downloads the + ensemble summary from a GitHub release, runs pyCECT, and writes an + enriched result file with dimension metadata for the summary action. + +inputs: + history-dir: + description: 'Path to directory containing history .nc files to validate' + required: true + label: + description: 'Human-readable label for log annotations (e.g. gcc/mpich3/smiol/4proc)' + required: true + mpas-version: + description: 'MPAS version string (used to build the ect-v{version} release tag)' + required: true + dimensions: + description: 'Multi-line key=value pairs describing this test combination (written into result file)' + required: false + default: '' + +outputs: + result: + description: 'ECT result: PASSED, FAILED, SKIPPED, or ERROR' + value: ${{ steps.outcome.outputs.result }} + available: + description: 'Whether the ensemble summary file was found (true/false)' + value: ${{ steps.summary.outputs.available }} + +runs: + using: 'composite' + steps: + - name: Load ECT configuration + id: config + shell: bash + run: | + CI_CONFIG=".github/ci-config.env" + if [ ! -f "${CI_CONFIG}" ]; then + echo "::error::ci-config.env not found" + exit 1 + fi + source "${CI_CONFIG}" + + # ECT release tag is derived from the MPAS version (passed in), + # not stored in ci-config.env. See .github/actions/mpas-version. + echo "summary-file=${ECT_SUMMARY_FILE}" >> $GITHUB_OUTPUT + echo "pycect-tag=${PYCECT_TAG}" >> $GITHUB_OUTPUT + echo "pycect-commit=${PYCECT_COMMIT}" >> $GITHUB_OUTPUT + echo "release-tag=ect-v${{ inputs.mpas-version }}" >> $GITHUB_OUTPUT + + - name: Install PyCECT dependencies + shell: bash + run: pip install "numpy<2" scipy netCDF4 + + - name: Clone PyCECT + shell: bash + run: | + TAG="${{ steps.config.outputs.pycect-tag }}" + COMMIT="${{ steps.config.outputs.pycect-commit }}" + echo "Cloning PyCECT at tag ${TAG} (commit ${COMMIT})..." + git clone --branch "${TAG}" https://github.com/NCAR/PyCECT.git pycect + cd pycect + if [ -n "${COMMIT}" ]; then + ACTUAL=$(git rev-parse HEAD) + if [ "${ACTUAL}" != "${COMMIT}" ]; then + echo "::error::PyCECT commit mismatch: expected ${COMMIT}, got ${ACTUAL}" + exit 1 + fi + fi + + - name: Download ensemble summary file + id: summary + shell: bash + run: | + source .github/ci-config.env + SUMMARY="${{ steps.config.outputs.summary-file }}" + TAG="${{ steps.config.outputs.release-tag }}" + REPO="${DATA_REPOSITORY:-${GITHUB_REPOSITORY}}" + URL="https://github.com/${REPO}/releases/download/${TAG}/${SUMMARY}" + + echo "Downloading ${SUMMARY} from release ${TAG}..." + HTTP_CODE=$(curl --retry 5 --retry-delay 5 -sL -w "%{http_code}" \ + "${URL}" -o "${SUMMARY}") + if [ "${HTTP_CODE}" != "200" ]; then + echo "::warning::Ensemble summary not available at ${URL} (HTTP ${HTTP_CODE})" + echo "available=false" >> $GITHUB_OUTPUT + exit 0 + fi + echo "Downloaded summary file: $(du -h ${SUMMARY})" + echo "available=true" >> $GITHUB_OUTPUT + echo "summary-file=${SUMMARY}" >> $GITHUB_OUTPUT + + - name: List test files + if: steps.summary.outputs.available == 'true' + shell: bash + run: | + echo "=== ECT test history files ===" + ls -la ${{ inputs.history-dir }}/ 2>/dev/null || echo "No history files found" + + - name: Run PyCECT + id: pycect + if: steps.summary.outputs.available == 'true' + shell: bash + run: | + LABEL="${{ inputs.label }}" + # --tslice 0 is invariant: history files are pre-trimmed to a single + # time slice by run-perturb-mpas (trim_history.py creates Time=1). + set +e + python pycect/pyCECT.py \ + --sumfile ${{ steps.summary.outputs.summary-file }} \ + --indir ${{ inputs.history-dir }} \ + --tslice 0 \ + --mpas \ + --verbose \ + --printStdMean \ + 2>&1 | tee pycect_output.txt + PYCECT_STATUS=$? + set -e + + if [ $PYCECT_STATUS -ne 0 ]; then + echo "::error::pyCECT crashed with exit code ${PYCECT_STATUS}" + echo "result=ERROR" >> $GITHUB_OUTPUT + exit 1 + fi + + if grep -qF '****PASSED****' pycect_output.txt; then + echo "::notice::ECT (${LABEL}): PASSED" + echo "result=PASSED" >> $GITHUB_OUTPUT + elif grep -qF '****FAILED****' pycect_output.txt; then + echo "::error::ECT (${LABEL}): FAILED" + echo "result=FAILED" >> $GITHUB_OUTPUT + exit 1 + else + echo "::warning::Could not determine ECT result from pyCECT output" + echo "result=ERROR" >> $GITHUB_OUTPUT + exit 1 + fi + + - name: ECT Results + if: steps.pycect.outputs.result != '' + shell: bash + run: | + LABEL="${{ inputs.label }}" + RESULT="${{ steps.pycect.outputs.result }}" + + if [ "${RESULT}" = "PASSED" ]; then + ICON=":white_check_mark:" + else + ICON=":x:" + fi + + { + echo "## ECT Result: ${ICON} ${RESULT}" + echo "" + echo "**Configuration:** \`${LABEL}\`" + echo "" + echo '```' + grep -E '(PASSED|FAILED|global|regional|Overall)' pycect_output.txt || true + echo '```' + if grep -q 'standardized mean' pycect_output.txt; then + echo "" + echo "
Standardized Mean Summary" + echo "" + echo '```' + grep 'standardized mean' pycect_output.txt || true + echo '```' + echo "
" + fi + } >> "$GITHUB_STEP_SUMMARY" + + echo "================================" + echo " ECT Result: ${RESULT}" + echo " Label: ${LABEL}" + echo "================================" + + - name: Fail if summary unavailable + if: steps.summary.outputs.available != 'true' + shell: bash + run: | + { + echo "## ECT Result: :warning: SKIPPED" + echo "" + echo "Ensemble summary file not found. Run \`ect-ensemble-gen.yml\` first." + } >> "$GITHUB_STEP_SUMMARY" + echo "::error::ECT validation cannot run — ensemble summary file not found. Run ect-ensemble-gen.yml first." + exit 1 + + - name: Write result file + id: outcome + if: always() + shell: bash + run: | + RESULT="${{ steps.pycect.outputs.result }}" + if [ -z "${RESULT}" ]; then + if [ "${{ steps.summary.outputs.available }}" != "true" ]; then + RESULT="SKIPPED" + else + RESULT="NO_DATA" + fi + fi + echo "result=${RESULT}" >> $GITHUB_OUTPUT + + { + echo "result=${RESULT}" + DIMS="${{ inputs.dimensions }}" + if [ -n "${DIMS}" ]; then + echo "${DIMS}" + fi + } > ect-result.txt diff --git a/.github/ci-config.env b/.github/ci-config.env new file mode 100644 index 0000000000..7d50cad77c --- /dev/null +++ b/.github/ci-config.env @@ -0,0 +1,75 @@ +# .github/ci-config.env — Central CI configuration +# +# Sourced by workflows and composite actions. Edit here to change +# containers, compilers, test data versions, or ECT parameters. + + +# ── Container images ────────────────────────────── +# {compiler} and {mpi} are replaced by resolve-container. +# Tags: https://hub.docker.com/r/ncarcisl/hpcdev-x86_64/tags + +CONTAINER_IMAGE="docker.io/ncarcisl/hpcdev-x86_64:almalinux9-{compiler}-{mpi}-26.02" + +# Per-compiler override: CONTAINER_IMAGE_{compiler}="template" + + +# ── Container name mappings ─────────────────────── +# Map CI compiler/MPI names to Docker Hub tag strings when they differ. + +CONTAINER_COMPILER_gcc=gcc14 +# CONTAINER_MPI_mpich=mpich3 + + +# ── Makefile target mapping ─────────────────────── +# MAKE_TARGET_{compiler} → MPAS Makefile target name. + +MAKE_TARGET_gcc=gfortran +MAKE_TARGET_nvhpc=nvhpc +MAKE_TARGET_oneapi=intel +MAKE_TARGET_clang=llvm + + +# ── Compiler workarounds ───────────────────────── + +# Portable target arch (CI builds and runs on different hosts). +NVHPC_TARGET_ARCH=-tp=px + +# Disable MPI Fortran 2008 bindings (not supported by MPICH +# builds in current containers). +NVHPC_EXTRA_MAKE_FLAGS="MPAS_MPI_F08=0" +ONEAPI_EXTRA_MAKE_FLAGS="MPAS_MPI_F08=0" + + +# ── MPI runtime flags ──────────────────────────── +# Needed because CI runs inside containers as root. +# Used in actions/run-mpas and actions/run-perturb-mpas. + +OPENMPI_RUN_FLAGS="--allow-run-as-root --oversubscribe" + + +# ── Test data ───────────────────────────────────── +# Test data and ECT summaries are hosted as GitHub release assets +# on NCAR/MPAS-Model-CI. Workflows use this variable to resolve +# download URLs. + +DATA_REPOSITORY=NCAR/MPAS-Model-CI + +RELEASE_TESTDATA_240KM=testdata-240km-v3 +RELEASE_TESTDATA_120KM=testdata-120km-v2 + + +# ── ECT configuration ──────────────────────────── +# Ensemble Consistency Test (PyCECT) parameters. +# The ECT release tag (ect-v{MPAS_VERSION}) is derived at runtime +# from src/core_atmosphere/Registry.xml by the mpas-version action. + +ECT_RESOLUTION=120km +ECT_ENSEMBLE_SIZE=200 +ECT_PERTURB_MAGNITUDE=1e-14 +ECT_PERTURB_VARIABLE=theta +ECT_SUMMARY_FILE=mpas_ect_summary_120km.nc +ECT_RESTART_FILE=120km-spinup-restart.nc +ECT_EXCLUDED_VARS=.github/data/ect_excluded_vars.txt +PYCECT_TAG=3.3.1 +PYCECT_COMMIT=b3c36a9d72ee211f396d1bc7078f6d5466916b0a + diff --git a/.github/data/ect_excluded_vars.txt b/.github/data/ect_excluded_vars.txt new file mode 100644 index 0000000000..478c6e786a --- /dev/null +++ b/.github/data/ect_excluded_vars.txt @@ -0,0 +1,35 @@ +# Variables excluded from ECT history files to reduce artifact size. +# These are either not analyzed by PyCECT or are expensive research +# diagnostics not needed for consistency testing. +# +# Ertel PV diagnostics (3D cell fields, ~12MB each on 120km mesh) +ertel_pv +u_pv +v_pv +theta_pv +vort_pv +iLev_DT +# +# PV tendency terms (3D cell fields, only present with physics enabled) +depv_dt_lw +depv_dt_sw +depv_dt_bl +depv_dt_cu +depv_dt_mix +dtheta_dt_mp +depv_dt_mp +depv_dt_diab +depv_dt_fric +depv_dt_diab_pv +depv_dt_fric_pv +# +# Edge velocity — PyCECT recommends uReconstructZonal/Meridional instead +u +# +# Integer variables — PyCECT excludes these automatically +i_rainnc +i_rainc +kpbl +# +# Time metadata string +xtime diff --git a/.github/workflows/_test-compiler.yml b/.github/workflows/_test-compiler.yml new file mode 100644 index 0000000000..bd132eb139 --- /dev/null +++ b/.github/workflows/_test-compiler.yml @@ -0,0 +1,258 @@ +# Reusable workflow: build and validate MPAS-A for a single compiler+MPI using ECT. +# Called by per-compiler per-MPI subset workflows (e.g. test-gcc-mpich.yml). +# +# Runs ECT with 4 MPI ranks. Validation uses the Ensemble Consistency Test +# (PyCECT) — perturbed runs compared against a pre-built ensemble summary — +# instead of bit-for-bit log comparison. + +name: _test-compiler + +permissions: + contents: read + +on: + workflow_call: + inputs: + compiler: + description: 'Compiler family (gcc, nvhpc, oneapi)' + required: true + type: string + mpi: + description: 'MPI implementation (mpich, openmpi)' + required: true + type: string + mpas-repository: + description: 'MPAS source repo (e.g. MPAS-Dev/MPAS-Model). Empty = this repo.' + required: false + type: string + default: '' + mpas-ref: + description: 'Git ref in the MPAS source repo (branch, tag, SHA)' + required: false + type: string + default: '' + +jobs: + config: + name: Resolve Config + runs-on: ubuntu-latest + outputs: + image: ${{ steps.container.outputs.image }} + mpas-version: ${{ steps.version.outputs.version }} + steps: + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + sparse-checkout: | + .github + src/core_atmosphere/Registry.xml + sparse-checkout-cone-mode: false + persist-credentials: false + - uses: ./.github/actions/resolve-container + id: container + with: + compiler: ${{ inputs.compiler }} + mpi: ${{ inputs.mpi }} + - uses: ./.github/actions/mpas-version + id: version + + build: + needs: config + name: Build (${{ inputs.compiler }}, ${{ inputs.mpi }}, smiol) + runs-on: ubuntu-latest + container: + image: ${{ needs.config.outputs.image }} + + steps: + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + repository: ${{ inputs.mpas-repository || github.repository }} + ref: ${{ inputs.mpas-ref || '' }} + submodules: 'true' + persist-credentials: false + + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + if: ${{ inputs.mpas-repository != '' }} + with: + path: _ci + sparse-checkout: .github + persist-credentials: false + + - name: Overlay CI infrastructure + if: ${{ inputs.mpas-repository != '' }} + shell: bash + run: | + cp -r _ci/.github . && rm -rf _ci + echo "## Source: ${{ inputs.mpas-repository }}@${{ inputs.mpas-ref }}" >> "$GITHUB_STEP_SUMMARY" + + - name: Build MPAS-A (double precision) + uses: ./.github/actions/build-mpas + with: + compiler: ${{ inputs.compiler }} + use-pio: 'false' + precision: double + + - name: Upload executable + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: exe-${{ inputs.compiler }}-${{ inputs.mpi }}-smiol + path: atmosphere_model + retention-days: 1 + + ect-run: + needs: [config, build] + if: ${{ needs.build.result == 'success' }} + strategy: + fail-fast: false + matrix: + member: [0, 1, 2] + + name: ECT member ${{ matrix.member }} (${{ inputs.compiler }}, ${{ inputs.mpi }}) + runs-on: ubuntu-latest + container: + image: ${{ needs.config.outputs.image }} + + steps: + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + persist-credentials: false + + - name: Download executable + id: download + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7 + continue-on-error: true + with: + name: exe-${{ inputs.compiler }}-${{ inputs.mpi }}-smiol + + - name: Download test case + if: steps.download.outcome == 'success' + uses: ./.github/actions/download-testdata + with: + resolution: 120km + dest-dir: base-case + + - name: Restore cached restart + if: steps.download.outcome == 'success' + id: cache-restart + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 + with: + path: spinup-restart.nc + key: ect-spinup-restart-${{ hashFiles('.github/ci-config.env') }} + + - name: Download spin-up restart + if: steps.download.outcome == 'success' + id: restart + shell: bash + run: | + source .github/ci-config.env + RESTART="${ECT_RESTART_FILE}" + RELEASE_TAG="ect-v${{ needs.config.outputs.mpas-version }}" + + if [ -f "spinup-restart.nc" ]; then + echo "Using cached restart file" + mv spinup-restart.nc "${RESTART}" + echo "available=true" >> $GITHUB_OUTPUT + echo "file=${RESTART}" >> $GITHUB_OUTPUT + exit 0 + fi + + DATA_REPO="${DATA_REPOSITORY:-${GITHUB_REPOSITORY}}" + echo "Downloading ${RESTART}.gz from ${DATA_REPO} release ${RELEASE_TAG}..." + HTTP_CODE=$(curl -sL --retry 5 --retry-delay 5 -w "%{http_code}" \ + "https://github.com/${DATA_REPO}/releases/download/${RELEASE_TAG}/${RESTART}.gz" \ + -o "${RESTART}.gz") + if [ "${HTTP_CODE}" = "200" ]; then + gunzip "${RESTART}.gz" + echo "Downloaded restart: $(du -h ${RESTART})" + echo "available=true" >> $GITHUB_OUTPUT + echo "file=${RESTART}" >> $GITHUB_OUTPUT + else + echo "::warning::Spin-up restart not available (HTTP ${HTTP_CODE}), running from cold-start init.nc" + echo "available=false" >> $GITHUB_OUTPUT + fi + + - name: Run perturbed MPAS-A (member ${{ matrix.member }}) + if: steps.download.outcome == 'success' + uses: ./.github/actions/run-perturb-mpas + with: + base-dir: base-case + executable: ./atmosphere_model + member-start: '${{ matrix.member }}' + member-end: '${{ matrix.member }}' + num-ranks: '4' + mpi-impl: ${{ inputs.mpi }} + run-duration: '0_02:36:00' + run-timeout: '45' + restart-file: ${{ steps.restart.outputs.available == 'true' && steps.restart.outputs.file || '' }} + + - name: Upload history file + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + if: always() && steps.download.outcome == 'success' + with: + name: ect-history-${{ inputs.compiler }}-${{ inputs.mpi }}-member${{ matrix.member }} + path: history-output/history.*.nc + retention-days: 1 + + ect-validate: + needs: [config, ect-run] + if: ${{ needs.ect-run.result == 'success' }} + name: ECT Validate (${{ inputs.compiler }}, ${{ inputs.mpi }}) + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + with: + sparse-checkout: .github + persist-credentials: false + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: '3.11' + + - name: Download history files + id: download + continue-on-error: true + uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7 + with: + pattern: ect-history-${{ inputs.compiler }}-${{ inputs.mpi }}-* + path: ect-test-files + merge-multiple: true + + - name: Run ECT validation + if: steps.download.outcome == 'success' + uses: ./.github/actions/validate-ect + with: + history-dir: ect-test-files + label: ${{ inputs.compiler }}/${{ inputs.mpi }}/smiol + mpas-version: ${{ needs.config.outputs.mpas-version }} + dimensions: | + compiler=${{ inputs.compiler }} + mpi=${{ inputs.mpi }} + io=smiol + + - name: Upload result + if: always() + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6 + with: + name: ect-result-${{ inputs.compiler }}-${{ inputs.mpi }} + path: ect-result.txt + retention-days: 1 + + cleanup: + needs: [ect-run, ect-validate] + if: always() + runs-on: ubuntu-latest + name: Cleanup + permissions: + actions: write + + steps: + - name: Delete temporary artifacts + env: + GH_TOKEN: ${{ github.token }} + shell: bash + run: | + gh api repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts \ + --paginate --jq '.artifacts[] | select(.name | startswith("exe-") or startswith("ect-history-")) | .id' \ + | while read id; do + gh api -X DELETE repos/${{ github.repository }}/actions/artifacts/$id || true + done || true diff --git a/.github/workflows/test-gcc-mpich.yml b/.github/workflows/test-gcc-mpich.yml new file mode 100644 index 0000000000..92085b509e --- /dev/null +++ b/.github/workflows/test-gcc-mpich.yml @@ -0,0 +1,19 @@ +# Subset CI: GNU + MPICH (CPU) +# Quick ECT validation with 4 MPI ranks. + +name: "GNU+MPICH (CPU)" + +on: + workflow_dispatch: + push: + branches: [master, develop, 'feature/ci-cpu-testing'] + pull_request: + branches: [master, develop, 'feature/ci-cpu-testing'] + +jobs: + test: + uses: ./.github/workflows/_test-compiler.yml + with: + compiler: gcc + mpi: mpich + diff --git a/.github/workflows/test-gcc-openmpi.yml b/.github/workflows/test-gcc-openmpi.yml new file mode 100644 index 0000000000..c8d2749e20 --- /dev/null +++ b/.github/workflows/test-gcc-openmpi.yml @@ -0,0 +1,19 @@ +# Subset CI: GNU + OpenMPI (CPU) +# Quick ECT validation with 4 MPI ranks. + +name: "GNU+OpenMPI (CPU)" + +on: + workflow_dispatch: + push: + branches: [master, develop, 'feature/ci-cpu-testing'] + pull_request: + branches: [master, develop, 'feature/ci-cpu-testing'] + +jobs: + test: + uses: ./.github/workflows/_test-compiler.yml + with: + compiler: gcc + mpi: openmpi + diff --git a/.github/workflows/test-intel-mpich.yml b/.github/workflows/test-intel-mpich.yml new file mode 100644 index 0000000000..d7f0ab62d7 --- /dev/null +++ b/.github/workflows/test-intel-mpich.yml @@ -0,0 +1,19 @@ +# Subset CI: Intel + MPICH (CPU) +# Quick ECT validation with 4 MPI ranks. + +name: "Intel+MPICH (CPU)" + +on: + workflow_dispatch: + push: + branches: [master, develop, 'feature/ci-cpu-testing'] + pull_request: + branches: [master, develop, 'feature/ci-cpu-testing'] + +jobs: + test: + uses: ./.github/workflows/_test-compiler.yml + with: + compiler: oneapi + mpi: mpich + diff --git a/.github/workflows/test-intel-openmpi.yml b/.github/workflows/test-intel-openmpi.yml new file mode 100644 index 0000000000..40f1a010ae --- /dev/null +++ b/.github/workflows/test-intel-openmpi.yml @@ -0,0 +1,19 @@ +# Subset CI: Intel + OpenMPI (CPU) +# Quick ECT validation with 4 MPI ranks. + +name: "Intel+OpenMPI (CPU)" + +on: + workflow_dispatch: + push: + branches: [master, develop, 'feature/ci-cpu-testing'] + pull_request: + branches: [master, develop, 'feature/ci-cpu-testing'] + +jobs: + test: + uses: ./.github/workflows/_test-compiler.yml + with: + compiler: oneapi + mpi: openmpi + diff --git a/.github/workflows/test-nvhpc-mpich.yml b/.github/workflows/test-nvhpc-mpich.yml new file mode 100644 index 0000000000..14e66fbf34 --- /dev/null +++ b/.github/workflows/test-nvhpc-mpich.yml @@ -0,0 +1,19 @@ +# Subset CI: NVHPC + MPICH (CPU) +# Quick ECT validation with 4 MPI ranks. + +name: "NVHPC+MPICH (CPU)" + +on: + workflow_dispatch: + push: + branches: [master, develop, 'feature/ci-cpu-testing'] + pull_request: + branches: [master, develop, 'feature/ci-cpu-testing'] + +jobs: + test: + uses: ./.github/workflows/_test-compiler.yml + with: + compiler: nvhpc + mpi: mpich + diff --git a/.github/workflows/test-nvhpc-openmpi.yml b/.github/workflows/test-nvhpc-openmpi.yml new file mode 100644 index 0000000000..094628d1ae --- /dev/null +++ b/.github/workflows/test-nvhpc-openmpi.yml @@ -0,0 +1,21 @@ +# Subset CI: NVHPC + OpenMPI (CPU) +# Quick ECT validation with 4 MPI ranks. +# Known issue: model exits 134 (SIGABRT) on GA runners with 4 ranks. +# MPICH works. + +name: "NVHPC+OpenMPI (CPU)" + +on: + workflow_dispatch: + push: + branches: [master, develop, 'feature/ci-cpu-testing'] + pull_request: + branches: [master, develop, 'feature/ci-cpu-testing'] + +jobs: + test: + uses: ./.github/workflows/_test-compiler.yml + with: + compiler: nvhpc + mpi: openmpi + diff --git a/README.md b/README.md index 8db0d85898..73d7043712 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,25 @@ MPAS-v8.4.0 ==== +## CI Status + +Each test builds MPAS-Atmosphere in double precision, runs 3 perturbed ensemble +members (4 MPI ranks), and validates with +[PyCECT](https://github.com/NCAR/PyCECT) +([Price-Broncucia et al. 2025](https://doi.org/10.5194/gmd-18-2349-2025)). +All testing subsets run automatically on every push and PR to the +master and develop branches. Reminder: Feature PRs should be opened +against `develop`. + +| Compiler | MPI | Status | +|----------|-----|--------| +| GNU | MPICH | [![GNU+MPICH (CPU)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-gcc-mpich.yml/badge.svg)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-gcc-mpich.yml) | +| GNU | OpenMPI | [![GNU+OpenMPI (CPU)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-gcc-openmpi.yml/badge.svg)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-gcc-openmpi.yml) | +| Intel | MPICH | [![Intel+MPICH (CPU)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-intel-mpich.yml/badge.svg)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-intel-mpich.yml) | +| Intel | OpenMPI | [![Intel+OpenMPI (CPU)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-intel-openmpi.yml/badge.svg)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-intel-openmpi.yml) | +| NVHPC | MPICH | [![NVHPC+MPICH (CPU)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-nvhpc-mpich.yml/badge.svg)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-nvhpc-mpich.yml) | +| NVHPC | OpenMPI | [![NVHPC+OpenMPI (CPU)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-nvhpc-openmpi.yml/badge.svg)](https://github.com/MPAS-Dev/MPAS-Model/actions/workflows/test-nvhpc-openmpi.yml) | + The Model for Prediction Across Scales (MPAS) is a collaborative project for developing atmosphere, ocean, and other earth-system simulation components for use in climate, regional climate, and weather studies. The primary development