From 0362ab50a1c3b78f5f25eba0726bab0ff4017ef0 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 08:06:29 -0600 Subject: [PATCH 01/13] Lib: Remove pandas < 3.0 constraint --- environment-dev.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment-dev.yml b/environment-dev.yml index 958ff6e2c..b0469e311 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -6,7 +6,7 @@ dependencies: - python>=3.10,<3.13 - pip - numpy>=2.0.0 - - pandas>=2.2.2,<3.0 + - pandas>=2.2.2 - scipy>=1.14.0 - xarray>=2024.6.0 - scikit-learn>=1.5.1 diff --git a/pyproject.toml b/pyproject.toml index d30cf3751..58baf03dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "h5py>=3.11.0", "h5pyd>=0.18.0", "numpy>=2.0.0", - "pandas>=2.2.2,<3.0", + "pandas>=2.2.2", "scipy>=1.14.0", "matplotlib>=3.9.1", "pecos>=0.3.0", From a1bebaae9f8fe31a1ed66e700add381142d0dcd2 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 08:13:37 -0600 Subject: [PATCH 02/13] NDBC: Handle pandas 2 and 3 missing values to nan conversion --- mhkit/wave/io/ndbc.py | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/mhkit/wave/io/ndbc.py b/mhkit/wave/io/ndbc.py index 113873e96..bbcc29d89 100644 --- a/mhkit/wave/io/ndbc.py +++ b/mhkit/wave/io/ndbc.py @@ -19,8 +19,40 @@ convert_nested_dict_and_pandas, ) -# Set pandas option to opt-in to future behavior -pd.set_option("future.no_silent_downcasting", True) + +def replace_pandas_missing_values_with_nan(data, missing_values): + """ + Replace missing values with NaN without silently downcasting dtypes. + + Parameters + ------------ + data: pandas DataFrame + Data in which to replace missing values + + missing_values: list of values + List of values that denote missing data + + Returns + --------- + data: pandas DataFrame + Data with missing values replaced by NaN and object columns converted + to their best-fit dtypes + + Notes + ----- + pandas versions above 2.x do not silently downcast in ``replace`` and + dropped the ``future.no_silent_downcasting`` option. On pandas 2.x the same + forward-looking behavior is opted into so the result is identical across + supported pandas versions and no deprecation warning is emitted. + ``infer_objects`` then converts the resulting object columns explicitly. + """ + pandas_major = int(pd.__version__.split(".")[0]) + if pandas_major <= 2: + with pd.option_context("future.no_silent_downcasting", True): + data = data.replace(missing_values, np.nan) + else: + data = data.replace(missing_values, np.nan) + return data.infer_objects() def read_file(file_name, missing_values=["MM", 9999, 999, 99], to_pandas=True): @@ -153,8 +185,7 @@ def read_file(file_name, missing_values=["MM", 9999, 999, 99], to_pandas=True): data.columns = data.columns # Replace indicated missing values with nan - data = data.replace(missing_values, np.nan) - data = data.infer_objects(copy=False) + data = replace_pandas_missing_values_with_nan(data, missing_values) if not to_pandas: data = convert_to_dataset(data) From 8d5e5d4092b28fade09e9a72ac27a3af93b25810 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 08:14:47 -0600 Subject: [PATCH 03/13] Tests: Loosen tolerance on NDBC real world lat/lon checks --- mhkit/tests/wave/io/test_ndbc.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mhkit/tests/wave/io/test_ndbc.py b/mhkit/tests/wave/io/test_ndbc.py index a3c1ee25b..bee861732 100644 --- a/mhkit/tests/wave/io/test_ndbc.py +++ b/mhkit/tests/wave/io/test_ndbc.py @@ -357,8 +357,13 @@ def test_get_buoy_metadata(self): metadata["provider"], "Owned and maintained by National Data Buoy Center" ) self.assertEqual(metadata["type"], "3-meter foam buoy w/ seal cage") - self.assertAlmostEqual(float(metadata["lat"]), 36.785) - self.assertAlmostEqual(float(metadata["lon"]), 122.396) + # NDBC 46042 is a deployed buoy that drifts within its watch + # circle, so the NDBC output position can drift over time (observed + # lat 36.785 -> 36.787, lon 122.396 -> 122.408). Use a loose tolerance + # that tracks the buoy's nominal location without failing on real + # movement, while still catching large parsing errors. + self.assertAlmostEqual(float(metadata["lat"]), 36.785, delta=0.05) + self.assertAlmostEqual(float(metadata["lon"]), 122.396, delta=0.05) self.assertEqual(metadata["Site elevation"], "sea level") def test_get_buoy_metadata_invalid_station(self): From b608b5678a0f37097849c298da96cc1c06cd0677 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 08:26:13 -0600 Subject: [PATCH 04/13] Tests: Fix deprecation by using upper case D for day in pandas floor Pandas deprecated lower case d here: https://github.com/pandas-dev/pandas/pull/58998 Upper case d `D` is calendar day: https://pandas.pydata.org/docs/user_guide/timeseries.html#period-aliases --- mhkit/tests/wave/io/test_cdip.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mhkit/tests/wave/io/test_cdip.py b/mhkit/tests/wave/io/test_cdip.py index 17d1bc2ad..fee5e21f4 100644 --- a/mhkit/tests/wave/io/test_cdip.py +++ b/mhkit/tests/wave/io/test_cdip.py @@ -130,18 +130,18 @@ def test_request_parse_workflow_multiyear(self): expected_index_final = datetime(year2, 12, 31) wave1D = data["data"]["wave"] - self.assertEqual(wave1D.index[0].floor("d").to_pydatetime(), expected_index0) + self.assertEqual(wave1D.index[0].floor("D").to_pydatetime(), expected_index0) self.assertEqual( - wave1D.index[-1].floor("d").to_pydatetime(), expected_index_final + wave1D.index[-1].floor("D").to_pydatetime(), expected_index_final ) for key, wave2D in data["data"]["wave2D"].items(): self.assertEqual( - wave2D.index[0].floor("d").to_pydatetime(), expected_index0 + wave2D.index[0].floor("D").to_pydatetime(), expected_index0 ) self.assertEqual( - wave2D.index[-1].floor("d").to_pydatetime(), expected_index_final + wave2D.index[-1].floor("D").to_pydatetime(), expected_index_final ) def test_plot_boxplot(self): From 518b775908d1ff5e982add695f76a3752672c682 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 08:38:42 -0600 Subject: [PATCH 05/13] Dev: Lint --- mhkit/tests/wave/io/test_cdip.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mhkit/tests/wave/io/test_cdip.py b/mhkit/tests/wave/io/test_cdip.py index fee5e21f4..dd5a798a5 100644 --- a/mhkit/tests/wave/io/test_cdip.py +++ b/mhkit/tests/wave/io/test_cdip.py @@ -7,7 +7,6 @@ import pytz import os - testdir = dirname(abspath(__file__)) datadir = normpath(join(testdir, "..", "..", "..", "..", "examples", "data", "wave")) From f12f4c77b7ccf598f34c9e5ff852bffd852669d1 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 13:28:16 -0600 Subject: [PATCH 06/13] Lib: Add pyarrow dependency pyarrow provides native string dtype support for mhkit. While technically optional, mhkit benefits from one standardized backend for representing columnar data. At this time pyarrow is that standard. --- environment-dev.yml | 2 ++ pyproject.toml | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/environment-dev.yml b/environment-dev.yml index b0469e311..15f1fd677 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -7,6 +7,8 @@ dependencies: - pip - numpy>=2.0.0 - pandas>=2.2.2 + # Provides pyarrow storage for the default string dtype in pandas 3.0+ + - pyarrow>=16.0.0 - scipy>=1.14.0 - xarray>=2024.6.0 - scikit-learn>=1.5.1 diff --git a/pyproject.toml b/pyproject.toml index 58baf03dd..f80034513 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,11 @@ dependencies = [ "h5pyd>=0.18.0", "numpy>=2.0.0", "pandas>=2.2.2", + # pandas 3.0+ uses a new string dtype by default and stores it using pyarrow + # when pyarrow is installed, otherwise a more limited numpy fallback. pandas + # keeps pyarrow optional, so we require it here to always get the reliable + # pyarrow storage. pyarrow 16.0.0 is the first version that supports numpy 2. + "pyarrow>=16.0.0", "scipy>=1.14.0", "matplotlib>=3.9.1", "pecos>=0.3.0", From 97d0d4d81d3fdb1aeb7a172552b574762c09fb43 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 17:22:09 -0600 Subject: [PATCH 07/13] Lib: Loosen hdf5 range for pyarrow The narrow hdf5 range causes the solve when pyarrow is included to be slow. Increasing the allowed range improves the pip solver speed. --- environment-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index 15f1fd677..3f845390b 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -15,7 +15,7 @@ dependencies: - h5py>=3.11.0 - h5pyd>=0.18.0 - netCDF4>=1.6.5 - - hdf5>=1.14.3,<1.14.5.0a0 + - hdf5>=1.14.3 - statsmodels>=0.14.2 - requests - beautifulsoup4 From f1714f8b3aaba43fd21f3195b8985aed728c4b5e Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 17:23:30 -0600 Subject: [PATCH 08/13] Lib: Match conda netcdf version in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f80034513..85e36866d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ # scenes. xarray, netcdf4, and h5 dependencies should all be synced to avoid # compatibility issues with individual modules. "xarray>=2024.6.0", - "netCDF4>=1.7.1.post1", + "netCDF4>=1.6.5", "h5py>=3.11.0", "h5pyd>=0.18.0", "numpy>=2.0.0", From 6f06e6bff8d2d081f041754dd3d78f87af95b818 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 17:23:56 -0600 Subject: [PATCH 09/13] Actions: Remove nc and h5 binary installs These should no longer be necessary with the changes to dependencies --- .github/workflows/main.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 48c6f83ec..5977b8423 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -291,25 +291,12 @@ jobs: name: data path: ~/.cache/mhkit - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - # Update apt-get cache - sudo apt-get update - sudo apt-get install -y libhdf5-dev libnetcdf-dev - - name: Update and install packages shell: bash run: | python -m pip install --upgrade pip wheel pip install -e ".[all,dev]" - - name: Reinstall h5py and netCDF4 with system libraries - if: runner.os == 'Linux' - shell: bash - run: | - pip install --force-reinstall --no-binary=:all: h5py netCDF4 - - name: Install setuptools for Python 3.12 if: matrix.python-version == '3.12' shell: bash From 6d33a0eacf8d04a4aff128a95f437343e1a96ff0 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 19:15:47 -0600 Subject: [PATCH 10/13] Actions: Test pandas 3 changes on all operating systems --- .github/workflows/main.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5977b8423..6eba238d1 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -36,7 +36,8 @@ jobs: steps: - id: set-matrix run: | - if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "develop" ]]; then + # Force the full OS matrix for the pandas 3.0 PR + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "develop" && "${{ github.head_ref }}" != "feat_add_pandas_3.0_support" ]]; then echo "matrix_os=[ \"ubuntu-latest\"]" >> $GITHUB_OUTPUT else echo "matrix_os=[\"windows-latest\", \"ubuntu-latest\", \"macos-latest\"]" >> $GITHUB_OUTPUT From 3326455210c4eaa547c871dfc871014f24472387 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Mon, 8 Jun 2026 20:29:12 -0600 Subject: [PATCH 11/13] Actions: Remove explicit coveralls installations This removes any explicit coveralls intalls in favor of the coverallsapp/github-action@v2 which is self contained and contains all binaries necessary for coveralls to run. --- .github/workflows/main.yml | 12 ++++++------ pyproject.toml | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6eba238d1..0ca01c07d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -94,7 +94,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -139,7 +139,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -182,7 +182,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -230,7 +230,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -356,7 +356,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -575,7 +575,7 @@ jobs: - name: Install notebook testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls nbval jupyter utm folium + conda install -y pytest coverage nbval jupyter utm folium - name: Install mhkit shell: bash -l {0} diff --git a/pyproject.toml b/pyproject.toml index 85e36866d..c9805d480 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,6 @@ dev = [ "pytest-cov", "pre-commit", "coverage", - "coveralls", "black", ] From ae5ee20ca59a80cbbeb22792678020c0ce86fd99 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Tue, 9 Jun 2026 08:14:25 -0600 Subject: [PATCH 12/13] Tests: Add test configuration and set mpl backend to Agg Agg is a non interactive matplotlib backend and lets the tests runh "headless". Without this specification tests can choose their backend which can sometimes lead to failing tests on headless machines (GH Actions). --- mhkit/tests/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 mhkit/tests/conftest.py diff --git a/mhkit/tests/conftest.py b/mhkit/tests/conftest.py new file mode 100644 index 000000000..1f49e55a4 --- /dev/null +++ b/mhkit/tests/conftest.py @@ -0,0 +1,7 @@ +"""Shared pytest configuration for the MHKiT test suite.""" + +import matplotlib + +# Use the non-interactive Agg backend so figure/animation tests run headlessly +# on every OS (Windows otherwise picks TkAgg and fails without Tcl/Tk). +matplotlib.use("Agg") From e64abe71702ea5dc1ab9c6c80f96866cad286607 Mon Sep 17 00:00:00 2001 From: "Simms, Andrew" Date: Tue, 9 Jun 2026 12:12:48 -0600 Subject: [PATCH 13/13] Actions: Remove this branch from test matrix --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0ca01c07d..a58f39277 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -36,8 +36,7 @@ jobs: steps: - id: set-matrix run: | - # Force the full OS matrix for the pandas 3.0 PR - if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "develop" && "${{ github.head_ref }}" != "feat_add_pandas_3.0_support" ]]; then + if [[ "${{ github.event_name }}" == "pull_request" && "${{ github.base_ref }}" == "develop" ]]; then echo "matrix_os=[ \"ubuntu-latest\"]" >> $GITHUB_OUTPUT else echo "matrix_os=[\"windows-latest\", \"ubuntu-latest\", \"macos-latest\"]" >> $GITHUB_OUTPUT