diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index aae379b87..7883c5a7d 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -73,24 +73,50 @@ jobs: if: success() || steps.install.conclusion == 'success' run: | . venv-thetis/bin/activate - : # Run the serial tests + : # Run the serial tests, excluding examples which are tested separately python -m pytest -n 12 --verbose --durations=0 --durations-min=60.0 \ - -m "parallel[1] or not parallel" thetis-repo/test + --ignore=thetis-repo/test/examples \ + -m parallel[match] thetis-repo/test - name: Test Thetis (parallel) if: success() || steps.install.conclusion == 'success' run: | . venv-thetis/bin/activate - : # Run the parallel tests (note that xdist is not valid in parallel) - mpiexec -n 2 python -m pytest --verbose --durations=0 --durations-min=60.0 \ - -m parallel[2] thetis-repo/test + : # Split the parallel tests into multiple mpiexec jobs to utilise all cores + export FIREDRAKE_RUN_SPLIT_TESTS_TIMEOUT=660s + export FIREDRAKE_RUN_SPLIT_TESTS_KILL_AFTER=30s + firedrake-run-split-tests 2 4 \ + --durations=0 --durations-min=60.0 \ + --timeout=600 --timeout-method=thread \ + thetis-repo/test - - name: Test Thetis adjoint + - name: Test Thetis examples (serial) if: success() || steps.install.conclusion == 'success' run: | - firedrake-clean . venv-thetis/bin/activate - python -m pytest -n 8 --verbose --durations=0 thetis-repo/test_adjoint + python -m pytest -n 12 --verbose --durations=0 --durations-min=60.0 \ + -m parallel[match] \ + thetis-repo/test/examples thetis-repo/test_adjoint/examples + + - name: Test Thetis adjoint (serial) + if: success() || steps.install.conclusion == 'success' + run: | + . venv-thetis/bin/activate + python -m pytest -n 2 --verbose --durations=0 --durations-min=60.0 \ + --ignore=thetis-repo/test_adjoint/examples \ + -m parallel[match] thetis-repo/test_adjoint + + - name: Test Thetis adjoint (parallel) + if: success() || steps.install.conclusion == 'success' + run: | + . venv-thetis/bin/activate + : # Split the parallel tests into multiple mpiexec jobs to utilise all cores + export FIREDRAKE_RUN_SPLIT_TESTS_TIMEOUT=660s + export FIREDRAKE_RUN_SPLIT_TESTS_KILL_AFTER=30s + firedrake-run-split-tests 2 1 \ + --durations=0 --durations-min=60.0 \ + --timeout=600 --timeout-method=thread \ + thetis-repo/test_adjoint - name: Post-cleanup if: always() diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index fa4fbcedb..cdf400aa7 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -9,5 +9,5 @@ jobs: uses: ./.github/workflows/core.yml with: branch: ${{ github.event.pull_request.head.sha }} - image: ${{ github.base_ref == 'release' && 'firedrakeproject/firedrake-vanilla-default:latest' || 'firedrakeproject/firedrake-vanilla-default:dev-main' }} + image: ${{ github.base_ref == 'release' && 'firedrakeproject/firedrake-vanilla-default:dev-release' || 'firedrakeproject/firedrake-vanilla-default:dev-main' }} secrets: inherit diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 7364ce8bc..dfe19045b 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -11,5 +11,5 @@ jobs: uses: ./.github/workflows/core.yml with: branch: ${{ github.ref_name }} - image: ${{ github.ref_name == 'release' && 'firedrakeproject/firedrake-vanilla-default:latest' || 'firedrakeproject/firedrake-vanilla-default:dev-main' }} + image: ${{ github.ref_name == 'release' && 'firedrakeproject/firedrake-vanilla-default:dev-release' || 'firedrakeproject/firedrake-vanilla-default:dev-main' }} secrets: inherit diff --git a/.github/workflows/weekly-release.yml b/.github/workflows/weekly-release.yml index b9f1d959d..711acfae1 100644 --- a/.github/workflows/weekly-release.yml +++ b/.github/workflows/weekly-release.yml @@ -12,5 +12,5 @@ jobs: uses: thetisproject/thetis/.github/workflows/core.yml@release with: branch: release - image: firedrakeproject/firedrake-vanilla-default:latest + image: firedrakeproject/firedrake-vanilla-default:dev-release secrets: inherit diff --git a/.gitignore b/.gitignore index b063d17b8..5b587d8c3 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ htmlcov/ nosetests.xml coverage.xml *,cover +*.errcode # Translations *.mo diff --git a/examples/tohoku_inversion/inverse_problem.py b/examples/tohoku_inversion/inverse_problem.py index 4115f22fa..5e63885db 100644 --- a/examples/tohoku_inversion/inverse_problem.py +++ b/examples/tohoku_inversion/inverse_problem.py @@ -48,6 +48,9 @@ source = get_source(mesh2d, source_model) if source_model == "okada": source.subfault_variables = args.okada_parameters + if os.getenv('THETIS_REGRESSION_TEST') is not None: + source.num_subfaults_par = 2 + source.num_subfaults_perp = 2 # Setup PDE output_dir = f"{pwd}/outputs_elev-init-optimization_{source_model}" diff --git a/test/examples/test_examples.py b/test/examples/test_examples.py index d73a8a5ef..c3743a4a1 100644 --- a/test/examples/test_examples.py +++ b/test/examples/test_examples.py @@ -7,6 +7,7 @@ import glob import sys import shutil +import runpy # set environment flag # can be used in examples to reduce cpu cost @@ -68,18 +69,65 @@ all_examples = glob.glob(os.path.join(examples_dir, '*/*.py')) all_examples = [f for f in all_examples if f not in exclude_files] +# Examples that should be exercised under MPI in the main parallel CI job. +# Keyed by path relative to `examples_dir` with the requested nprocs. +parallel_examples = { + os.path.join('discrete_turbines', 'tidal_array.py'): 2, +} -@pytest.fixture(params=all_examples) +for relpath, nprocs in parallel_examples.items(): + abspath = os.path.join(examples_dir, relpath) + try: + idx = all_examples.index(abspath) + except ValueError: + continue + all_examples[idx] = pytest.param(abspath, marks=pytest.mark.parallel(nprocs)) + + +@pytest.fixture(params=all_examples, + ids=lambda x: os.path.relpath(x, examples_dir)) def example_file(request): return os.path.abspath(request.param) -def test_examples(example_file, tmpdir, monkeypatch): +def test_examples(example_file, tmp_path, monkeypatch, request): assert os.path.isfile(example_file), 'File not found {:}'.format(example_file) # copy mesh files source = os.path.dirname(example_file) - for f in glob.glob(os.path.join(source, '*.msh')): - shutil.copy(f, str(tmpdir)) - # change workdir to temporary dir - monkeypatch.chdir(tmpdir) - subprocess.check_call([sys.executable, example_file]) + + if request.node.get_closest_marker("parallel") is None: + # Serial example: copy mesh files and run in a subprocess. + for f in glob.glob(os.path.join(source, '*.msh')): + shutil.copy(f, str(tmp_path)) + # change workdir to temporary dir + monkeypatch.chdir(tmp_path) + subprocess.check_call([sys.executable, example_file]) + return + + # Parallel example: run the script under the current MPI communicator. + # In CI this test is selected only in the main parallel job (outer mpiexec -n 2), + # so we must not spawn mpiexec here (no nested MPI). We also coordinate a shared + # working directory across ranks to avoid each rank creating its own tmp_path. + from mpi4py import MPI + comm = MPI.COMM_WORLD + if comm.rank == 0: + workdir = tmp_path + for f in glob.glob(os.path.join(source, '*.msh')): + shutil.copy(f, str(workdir)) + else: + workdir = None + workdir = comm.bcast(str(workdir) if workdir is not None else None, root=0) + comm.barrier() + monkeypatch.chdir(workdir) + # Make local imports like `import turbine_callback` work the same way they do + # when running `python /abs/path/to/example.py` (which prepends the script dir + # to sys.path). + added_to_syspath = False + if source not in sys.path: + sys.path.insert(0, source) + added_to_syspath = True + try: + runpy.run_path(example_file, run_name="__main__") + finally: + if added_to_syspath and sys.path and sys.path[0] == source: + sys.path.pop(0) diff --git a/test_adjoint/conftest.py b/test_adjoint/conftest.py index 3e4f20137..4f437b434 100644 --- a/test_adjoint/conftest.py +++ b/test_adjoint/conftest.py @@ -5,4 +5,7 @@ def pytest_runtest_teardown(item, nextitem): from pyadjoint import get_working_tape # clear the adjoint tape, so subsequent tests don't interfere - get_working_tape().clear_tape() + tape = get_working_tape() + # Some tests run external scripts and never create a tape in the main pytest process. + if tape is not None: + tape.clear_tape() diff --git a/test_adjoint/examples/test_adjoint_examples.py b/test_adjoint/examples/test_adjoint_examples.py new file mode 100644 index 000000000..ebf3502fd --- /dev/null +++ b/test_adjoint/examples/test_adjoint_examples.py @@ -0,0 +1,75 @@ +""" +Runs all adjoint example scripts. Only tests whether examples can be executed. +""" +import pytest +import os +import subprocess +import glob +import sys +import shutil +import runpy + +# set environment flag +# can be used in examples to reduce cpu cost +os.environ['THETIS_REGRESSION_TEST'] = "1" + +# list of all adjoint examples to run +adjoint_files_serial = [ + 'tidalfarm/tidalfarm.py', + 'channel_inversion/inverse_problem.py', + 'headland_inversion/inverse_problem.py', + 'tohoku_inversion/inverse_problem.py', +] + +adjoint_files_parallel = [ + 'discrete_turbines/channel-optimisation.py', +] + +cwd = os.path.abspath(os.path.dirname(__file__)) +examples_dir = os.path.abspath(os.path.join(cwd, '..', '..', 'examples')) + +all_examples = ( + [os.path.join(examples_dir, f) for f in adjoint_files_serial] + + [ + pytest.param(os.path.join(examples_dir, f), marks=pytest.mark.parallel(2)) + for f in adjoint_files_parallel + ] +) + + +@pytest.fixture(params=all_examples, + ids=lambda x: os.path.relpath(x, examples_dir)) +def example_file(request): + return os.path.abspath(request.param) + + +def test_examples(example_file, tmp_path, tmp_path_factory, monkeypatch, request): + assert os.path.isfile(example_file), 'File not found {:}'.format(example_file) + # copy mesh files + source = os.path.dirname(example_file) + + if request.node.get_closest_marker("parallel") is None: + # Serial example: copy mesh files and run in a subprocess. + for f in glob.glob(os.path.join(source, '*.msh')): + shutil.copy(f, str(tmp_path)) + # change workdir to temporary dir + monkeypatch.chdir(tmp_path) + subprocess.check_call([sys.executable, example_file]) + return + + # Parallel example: run the script under the current MPI communicator. + # In CI this test is selected only in the adjoint-parallel job (outer mpiexec -n 2), + # so we must not spawn mpiexec here (no nested MPI). We also coordinate a shared + # working directory across ranks to avoid each rank creating its own tmpdir. + from mpi4py import MPI + comm = MPI.COMM_WORLD + if comm.rank == 0: + workdir = tmp_path_factory.mktemp("thetis-adjoint-example-channel-optimisation") + for f in glob.glob(os.path.join(source, '*.msh')): + shutil.copy(f, str(workdir)) + else: + workdir = None + workdir = comm.bcast(str(workdir) if workdir is not None else None, root=0) + comm.barrier() + monkeypatch.chdir(workdir) + runpy.run_path(example_file, run_name="__main__") diff --git a/test_adjoint/examples/test_examples.py b/test_adjoint/examples/test_examples.py deleted file mode 100644 index bfba102ac..000000000 --- a/test_adjoint/examples/test_examples.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Runs all adjoint example scripts. Only tests whether examples can be executed. -""" -import pytest -import os -import subprocess -import glob -import sys -import shutil - -# set environment flag -# can be used in examples to reduce cpu cost -os.environ['THETIS_REGRESSION_TEST'] = "1" - -# list of all adjoint examples to run -adjoint_files = [ - 'tidalfarm/tidalfarm.py', - 'channel_inversion/inverse_problem.py', - 'headland_inversion/inverse_problem.py', - 'tohoku_inversion/inverse_problem.py', - 'discrete_turbines/channel-optimisation.py', -] - -cwd = os.path.abspath(os.path.dirname(__file__)) -examples_dir = os.path.abspath(os.path.join(cwd, '..', '..', 'examples')) - -include_files = [os.path.join(examples_dir, f) for f in adjoint_files] - -all_examples = include_files - - -@pytest.fixture(params=all_examples, - ids=lambda x: os.path.basename(x)) -def example_file(request): - return os.path.abspath(request.param) - - -def test_examples(example_file, tmpdir, monkeypatch): - assert os.path.isfile(example_file), 'File not found {:}'.format(example_file) - # copy mesh files - source = os.path.dirname(example_file) - for f in glob.glob(os.path.join(source, '*.msh')): - shutil.copy(f, str(tmpdir)) - # change workdir to temporary dir - monkeypatch.chdir(tmpdir) - subprocess.check_call([sys.executable, example_file])