diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 48c6f83e..a58f3927 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -93,7 +93,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -138,7 +138,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -181,7 +181,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -229,7 +229,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -291,25 +291,12 @@ jobs: name: data path: ~/.cache/mhkit - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - # Update apt-get cache - sudo apt-get update - sudo apt-get install -y libhdf5-dev libnetcdf-dev - - name: Update and install packages shell: bash run: | python -m pip install --upgrade pip wheel pip install -e ".[all,dev]" - - name: Reinstall h5py and netCDF4 with system libraries - if: runner.os == 'Linux' - shell: bash - run: | - pip install --force-reinstall --no-binary=:all: h5py netCDF4 - - name: Install setuptools for Python 3.12 if: matrix.python-version == '3.12' shell: bash @@ -368,7 +355,7 @@ jobs: - name: Install testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls + conda install -y pytest coverage - name: Install mhkit shell: bash -l {0} @@ -587,7 +574,7 @@ jobs: - name: Install notebook testing dependencies shell: bash -l {0} run: | - conda install -y pytest coverage coveralls nbval jupyter utm folium + conda install -y pytest coverage nbval jupyter utm folium - name: Install mhkit shell: bash -l {0} diff --git a/environment-dev.yml b/environment-dev.yml index 958ff6e2..3f845390 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -6,14 +6,16 @@ dependencies: - python>=3.10,<3.13 - pip - numpy>=2.0.0 - - pandas>=2.2.2,<3.0 + - pandas>=2.2.2 + # Provides pyarrow storage for the default string dtype in pandas 3.0+ + - pyarrow>=16.0.0 - scipy>=1.14.0 - xarray>=2024.6.0 - scikit-learn>=1.5.1 - h5py>=3.11.0 - h5pyd>=0.18.0 - netCDF4>=1.6.5 - - hdf5>=1.14.3,<1.14.5.0a0 + - hdf5>=1.14.3 - statsmodels>=0.14.2 - requests - beautifulsoup4 diff --git a/mhkit/tests/conftest.py b/mhkit/tests/conftest.py new file mode 100644 index 00000000..1f49e55a --- /dev/null +++ b/mhkit/tests/conftest.py @@ -0,0 +1,7 @@ +"""Shared pytest configuration for the MHKiT test suite.""" + +import matplotlib + +# Use the non-interactive Agg backend so figure/animation tests run headlessly +# on every OS (Windows otherwise picks TkAgg and fails without Tcl/Tk). +matplotlib.use("Agg") diff --git a/mhkit/tests/wave/io/test_cdip.py b/mhkit/tests/wave/io/test_cdip.py index 17d1bc2a..dd5a798a 100644 --- a/mhkit/tests/wave/io/test_cdip.py +++ b/mhkit/tests/wave/io/test_cdip.py @@ -7,7 +7,6 @@ import pytz import os - testdir = dirname(abspath(__file__)) datadir = normpath(join(testdir, "..", "..", "..", "..", "examples", "data", "wave")) @@ -130,18 +129,18 @@ def test_request_parse_workflow_multiyear(self): expected_index_final = datetime(year2, 12, 31) wave1D = data["data"]["wave"] - self.assertEqual(wave1D.index[0].floor("d").to_pydatetime(), expected_index0) + self.assertEqual(wave1D.index[0].floor("D").to_pydatetime(), expected_index0) self.assertEqual( - wave1D.index[-1].floor("d").to_pydatetime(), expected_index_final + wave1D.index[-1].floor("D").to_pydatetime(), expected_index_final ) for key, wave2D in data["data"]["wave2D"].items(): self.assertEqual( - wave2D.index[0].floor("d").to_pydatetime(), expected_index0 + wave2D.index[0].floor("D").to_pydatetime(), expected_index0 ) self.assertEqual( - wave2D.index[-1].floor("d").to_pydatetime(), expected_index_final + wave2D.index[-1].floor("D").to_pydatetime(), expected_index_final ) def test_plot_boxplot(self): diff --git a/mhkit/tests/wave/io/test_ndbc.py b/mhkit/tests/wave/io/test_ndbc.py index a3c1ee25..bee86173 100644 --- a/mhkit/tests/wave/io/test_ndbc.py +++ b/mhkit/tests/wave/io/test_ndbc.py @@ -357,8 +357,13 @@ def test_get_buoy_metadata(self): metadata["provider"], "Owned and maintained by National Data Buoy Center" ) self.assertEqual(metadata["type"], "3-meter foam buoy w/ seal cage") - self.assertAlmostEqual(float(metadata["lat"]), 36.785) - self.assertAlmostEqual(float(metadata["lon"]), 122.396) + # NDBC 46042 is a deployed buoy that drifts within its watch + # circle, so the NDBC output position can drift over time (observed + # lat 36.785 -> 36.787, lon 122.396 -> 122.408). Use a loose tolerance + # that tracks the buoy's nominal location without failing on real + # movement, while still catching large parsing errors. + self.assertAlmostEqual(float(metadata["lat"]), 36.785, delta=0.05) + self.assertAlmostEqual(float(metadata["lon"]), 122.396, delta=0.05) self.assertEqual(metadata["Site elevation"], "sea level") def test_get_buoy_metadata_invalid_station(self): diff --git a/mhkit/wave/io/ndbc.py b/mhkit/wave/io/ndbc.py index 113873e9..bbcc29d8 100644 --- a/mhkit/wave/io/ndbc.py +++ b/mhkit/wave/io/ndbc.py @@ -19,8 +19,40 @@ convert_nested_dict_and_pandas, ) -# Set pandas option to opt-in to future behavior -pd.set_option("future.no_silent_downcasting", True) + +def replace_pandas_missing_values_with_nan(data, missing_values): + """ + Replace missing values with NaN without silently downcasting dtypes. + + Parameters + ------------ + data: pandas DataFrame + Data in which to replace missing values + + missing_values: list of values + List of values that denote missing data + + Returns + --------- + data: pandas DataFrame + Data with missing values replaced by NaN and object columns converted + to their best-fit dtypes + + Notes + ----- + pandas versions above 2.x do not silently downcast in ``replace`` and + dropped the ``future.no_silent_downcasting`` option. On pandas 2.x the same + forward-looking behavior is opted into so the result is identical across + supported pandas versions and no deprecation warning is emitted. + ``infer_objects`` then converts the resulting object columns explicitly. + """ + pandas_major = int(pd.__version__.split(".")[0]) + if pandas_major <= 2: + with pd.option_context("future.no_silent_downcasting", True): + data = data.replace(missing_values, np.nan) + else: + data = data.replace(missing_values, np.nan) + return data.infer_objects() def read_file(file_name, missing_values=["MM", 9999, 999, 99], to_pandas=True): @@ -153,8 +185,7 @@ def read_file(file_name, missing_values=["MM", 9999, 999, 99], to_pandas=True): data.columns = data.columns # Replace indicated missing values with nan - data = data.replace(missing_values, np.nan) - data = data.infer_objects(copy=False) + data = replace_pandas_missing_values_with_nan(data, missing_values) if not to_pandas: data = convert_to_dataset(data) diff --git a/pyproject.toml b/pyproject.toml index d30cf375..c9805d48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,11 +25,16 @@ dependencies = [ # scenes. xarray, netcdf4, and h5 dependencies should all be synced to avoid # compatibility issues with individual modules. "xarray>=2024.6.0", - "netCDF4>=1.7.1.post1", + "netCDF4>=1.6.5", "h5py>=3.11.0", "h5pyd>=0.18.0", "numpy>=2.0.0", - "pandas>=2.2.2,<3.0", + "pandas>=2.2.2", + # pandas 3.0+ uses a new string dtype by default and stores it using pyarrow + # when pyarrow is installed, otherwise a more limited numpy fallback. pandas + # keeps pyarrow optional, so we require it here to always get the reliable + # pyarrow storage. pyarrow 16.0.0 is the first version that supports numpy 2. + "pyarrow>=16.0.0", "scipy>=1.14.0", "matplotlib>=3.9.1", "pecos>=0.3.0", @@ -91,7 +96,6 @@ dev = [ "pytest-cov", "pre-commit", "coverage", - "coveralls", "black", ]