Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/access_moppy/derivations/calc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def calculate_monthly_minimum(
- The function uses xarray's resample method with 'M' frequency (end of month)
- Cell methods attribute is updated to reflect the temporal aggregation
- Time coordinate is set to each month's midpoint (centre of time_bnds)
- CMIP fill values (1e20) and other sentinel values are masked before aggregation
"""
if time_dim not in da.dims:
raise ValueError(
Expand All @@ -254,6 +255,18 @@ def calculate_monthly_minimum(
"calendar"
)

# Mask CMIP fill values (1e20) and other sentinel values before aggregation
fill_value = da.attrs.get("_FillValue") or da.encoding.get("_FillValue")
missing_value = da.attrs.get("missing_value") or da.encoding.get("missing_value")
fill_val = fill_value if fill_value is not None else missing_value

if fill_val is not None:
try:
fill_val = float(fill_val)
da = da.where(da != fill_val)
except (TypeError, ValueError):
pass

# Perform monthly resampling using minimum (lazy operation)
if (
not np.issubdtype(da[time_dim].dtype, np.datetime64)
Expand Down Expand Up @@ -335,6 +348,7 @@ def calculate_monthly_maximum(
- The function uses xarray's resample method with 'M' frequency (end of month)
- Cell methods attribute is updated to reflect the temporal aggregation
- Time coordinate is set to each month's midpoint (centre of time_bnds)
- CMIP fill values (1e20) and other sentinel values are masked before aggregation
"""
if time_dim not in da.dims:
raise ValueError(
Expand All @@ -354,6 +368,18 @@ def calculate_monthly_maximum(
"calendar"
)

# Mask CMIP fill values (1e20) and other sentinel values before aggregation
fill_value = da.attrs.get("_FillValue") or da.encoding.get("_FillValue")
missing_value = da.attrs.get("missing_value") or da.encoding.get("missing_value")
fill_val = fill_value if fill_value is not None else missing_value

if fill_val is not None:
try:
fill_val = float(fill_val)
da = da.where(da != fill_val)
except (TypeError, ValueError):
pass

# Perform monthly resampling using maximum (lazy operation)
if (
not np.issubdtype(da[time_dim].dtype, np.datetime64)
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/test_derivations_calc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,32 @@ def test_max_is_gte_min(self):
result_max = calculate_monthly_maximum(da)
assert (result_max.values >= result_min.values).all()

@pytest.mark.unit
def test_masks_fill_values_in_attrs(self):
"""Test that 1e20 fill values in attrs are masked before aggregation."""
times = xr.date_range("2000-01-01", periods=30, freq="D")
data = np.ones(30) * 10.0
data[5] = 1e20 # Insert fill value
da = xr.DataArray(data, dims=["time"], coords={"time": times})
da.attrs["_FillValue"] = 1e20

result = calculate_monthly_maximum(da)
# Maximum should be 10.0, not 1e20
assert float(result.values[0]) == pytest.approx(10.0)

@pytest.mark.unit
def test_masks_fill_values_in_encoding(self):
"""Test that fill values in encoding are masked."""
times = xr.date_range("2000-01-01", periods=30, freq="D")
data = np.ones(30) * 5.0
data[10] = 1e20 # Insert fill value
da = xr.DataArray(data, dims=["time"], coords={"time": times})
da.encoding["_FillValue"] = 1e20

result = calculate_monthly_minimum(da)
# Minimum should be 5.0, not 1e20
assert float(result.values[0]) == pytest.approx(5.0)

@pytest.mark.unit
def test_resample_failure_raises_runtime_error(self):
"""An exception raised inside the resample block is wrapped as RuntimeError."""
Expand Down