diff --git a/echopype/commongrid/__init__.py b/echopype/commongrid/__init__.py index 63172d2bd..71d149be4 100644 --- a/echopype/commongrid/__init__.py +++ b/echopype/commongrid/__init__.py @@ -1,7 +1,8 @@ -from .api import compute_MVBS, compute_MVBS_index_binning, compute_NASC +from .api import compute_MVBS, compute_MVBS_index_binning, compute_NASC, resample_to_geometry __all__ = [ "compute_MVBS", "compute_NASC", "compute_MVBS_index_binning", + "resample_to_geometry", ] diff --git a/echopype/commongrid/api.py b/echopype/commongrid/api.py index c5fcc4ddc..f3a3ff7df 100644 --- a/echopype/commongrid/api.py +++ b/echopype/commongrid/api.py @@ -3,6 +3,7 @@ """ import logging +import warnings from typing import Literal import numpy as np @@ -10,14 +11,18 @@ import xarray as xr from ..consolidate.api import POSITION_VARIABLES +from ..qc.api import coerce_increasing_time, exist_reversed_time from ..utils.prov import add_processing_level, echopype_prov_attrs, insert_input_processing_level from .utils import ( _convert_bins_to_interval_index, _get_reduced_positions, + _lin2log, + _log2lin, _parse_x_bin, _set_MVBS_attrs, _set_var_attrs, _setup_and_validate, + _weighted_mean_kernel, compute_raw_MVBS, compute_raw_NASC, get_distance_from_latlon, @@ -416,5 +421,151 @@ def compute_NASC( return ds_NASC -def regrid(): - return 1 +def resample_to_geometry( + ds_Sv, target_variable: str, target_channel: str | None = None, target_grid: xr.DataArray = None +) -> xr.Dataset: + """ + Regrids all channels in the EchoData object to match the geometry + of the specified target channel index. + + Parameters + ---------- + ds_Sv : xr.Dataset + Input Dataset containing Sv data + target_channel : str, optional + Channel used as reference grid. Must be provided if target_grid is None. + + target_grid : xr.DataArray, optional + Custom grid. Must be provided if target_channel is None. + Data array must have dimension ('ping_time', 'range_sample'). + Returns + ------- + xr.Dataset + A new Dataset where all channels share the same `ping_time`, + `range_sample`, and `echo_range` as the target. + """ + + LOG_VARIABLES = {"Sv", "Sp", "TS"} + + if target_variable != "Sv": + warnings.warn( + f"Resampling '{target_variable}' with overlap-weighted averaging. " + "This function is primarily intended and validated for Sv. " + "Variables such as Sp and TS are resampled in the linear domain, " + "but interpretation should be done with caution. " + "Angle variables are resampled geometrically and may not be " + "physically equivalent to recomputing angles from complex data.", + UserWarning, + stacklevel=2, + ) + + if (target_channel is not None) == (target_grid is not None): + raise ValueError("Provide only one of target_channel or target_grid, not both.") + + if (target_channel is None) == (target_grid is None): + raise ValueError("Provide exactly one of target_channel or target_grid.") + + if exist_reversed_time(ds_Sv, "ping_time"): + coerce_increasing_time(ds_Sv) + + channels = ds_Sv.channel.values + ds_var_names = ds_Sv.keys() + + if target_variable not in ds_var_names: + raise ValueError(f"{target_variable} is not part of the variable names in : {ds_var_names}") + + expected_dims = {"channel", "ping_time", "range_sample"} + actual_dims = set(ds_Sv[target_variable].dims) + if actual_dims != expected_dims: + raise ValueError( + f"Target variable '{target_variable}' must have exactly the dimensions " + f"('channel', 'ping_time', 'range_sample'). Found: {ds_Sv[target_variable].dims}" + ) + + if target_channel and target_channel not in channels: + raise ValueError(f"{target_channel} is not part of the channel names in : {channels}") + + if target_grid is not None and target_grid.dims != ("ping_time", "range_sample"): + raise ValueError("target_grid dimensions do not match expected dimensions.") + da_var = ds_Sv[target_variable] + + if target_channel: + ds_target = ds_Sv.sel(channel=target_channel).copy() + target_range_da = ds_target["echo_range"] + # Target grid is given + else: + target_range_da = target_grid + + # List to hold the aligned DataArrays + aligned_arrays = [] + + for channel in channels: + + ds_source = da_var.sel(channel=channel) + + if target_variable in LOG_VARIABLES: + source_linear = _log2lin(ds_source) + else: + source_linear = ds_source + source_range_da = ds_Sv["echo_range"].sel(channel=channel) + + # Apply weighted mean resapling as Ufunc + + result_linear = xr.apply_ufunc( + _weighted_mean_kernel, + target_range_da, + source_range_da, + source_linear, + input_core_dims=[["range_sample"], ["range_sample"], ["range_sample"]], + output_core_dims=[["range_sample"]], + vectorize=True, + dask="parallelized", + output_dtypes=[np.float64], + ) + + # Convert back to log domain + result_linear = result_linear.where(result_linear > 0) + if target_variable in LOG_VARIABLES: + resample_variable = _lin2log(result_linear) + else: + resample_variable = result_linear + + resample_variable.name = target_variable + resample_variable = resample_variable.assign_coords(channel=channel) + + aligned_arrays.append(resample_variable) + + ds_combined = xr.concat(aligned_arrays, dim="channel") + echo_range_aligned = target_range_da.broadcast_like(ds_combined) + + new_ds = xr.Dataset( + data_vars={ + target_variable: ds_combined, + "echo_range": echo_range_aligned, + "frequency_nominal": ds_Sv["frequency_nominal"], + } + ) + if "water_level" in ds_Sv: + new_ds["water_level"] = ds_Sv["water_level"] + + # Attach attributes + new_ds[target_variable].attrs = ds_Sv[target_variable].attrs + range_var_attrs = np.round(target_range_da.diff("range_sample").mean().values, decimals=4) + if target_channel: + new_ds[target_variable].attrs["resampling_mode"] = "target_channel" + new_ds[target_variable].attrs["target_channel"] = target_channel + new_ds[target_variable].attrs["grid_spacing"] = str(range_var_attrs) + "m" + else: + new_ds[target_variable].attrs["resampling_mode"] = "target_grid" + new_ds[target_variable].attrs["target_grid"] = target_grid + new_ds[target_variable].attrs["grid_spacing"] = str(range_var_attrs) + "m" + + prov_dict = echopype_prov_attrs(process_type="processing") + prov_dict["processing_function"] = "commongrid.resample_to_geometry" + new_ds = new_ds.assign_attrs(prov_dict) + new_ds = insert_input_processing_level(new_ds, ds_Sv) + + if "echo_range" in ds_Sv: + new_ds["echo_range"].attrs = ds_Sv["echo_range"].attrs + + return new_ds diff --git a/echopype/commongrid/utils.py b/echopype/commongrid/utils.py index a6ea8df02..9ace0e4e0 100644 --- a/echopype/commongrid/utils.py +++ b/echopype/commongrid/utils.py @@ -696,3 +696,170 @@ def ping_time_bin_parsing_and_conversion(ping_time_bin: str): ) ping_time_bin_resunit_label = timedelta_units[ping_time_bin_resunit]["unitstr"] return ping_time_bin_resvalue, ping_time_bin_resunit_label + + +def get_valid_max_depth_ping_index( + ds: xr.Dataset, target_channel: str = None, target_grid: xr.DataArray = None +) -> int: + """ + Finds the integer indices of the last non-NaN value in a DataArray. + + Parameters + ---------- + ds : xr.Dataset + The xarray Dataset containing the echo_range variable. + target_channel : string, optional + The label of the channel from which to find the deepest ping. + target_grid : xr.DataArray, optional + A data array specifying the target grid from which to find the deeepst ping. + Data array must have dimension ('ping_time', 'range_sample'). + Returns + ------- + deepest_ping: int + Index of the ping containing the most amount of data before trailing NaN values. + """ + da = xr.DataArray() + if target_channel is not None: + da = ds.sel(channel=target_channel)["echo_range"].values + if target_grid is not None: + da = target_grid.values + + deepest_ping = 0 + max_range_sample = -np.inf + + for i in range(da.shape[0]): + if np.isnan(da[i, :]).all(): + continue + all_nan = np.isnan(da[i, :]) + row_indices = np.where(~all_nan)[0] + max_range_sample = max(max_range_sample, row_indices.max()) + if max_range_sample == row_indices.max(): + deepest_ping = i + return deepest_ping + + +def _exact_integration_kernel(target_edges, source_edge, source_value): + """ + Helper Kernel: Performs exact interval intersection to determine energy distribution. + """ + target_edges = np.asanyarray(target_edges) + source_edge = np.asanyarray(source_edge) + source_value = np.asanyarray(source_value) + + target_min, target_max = target_edges[0], target_edges[-1] + + idx_start = np.searchsorted(source_edge, target_min, side="right") - 1 + idx_end = np.searchsorted(source_edge, target_max, side="left") + 1 + + idx_start = max(0, idx_start) + idx_end = min(len(source_edge), idx_end) + + source_edge_sub = source_edge[idx_start:idx_end] + source_val_sub = source_value[idx_start : max(idx_start, idx_end - 1)] + + if len(source_edge_sub) < 2: + return np.full(len(target_edges) - 1, np.nan) + + all_edges = np.concatenate([target_edges, source_edge_sub]) + union_edges = np.unique(all_edges) + + union_centers = 0.5 * (union_edges[:-1] + union_edges[1:]) + union_widths = np.diff(union_edges) + + source_indices = np.searchsorted(source_edge_sub, union_centers, side="right") - 1 + + valid_source = (source_indices >= 0) & (source_indices < len(source_val_sub)) + + atomic_energies = np.zeros_like(union_widths) + atomic_energies[valid_source] = ( + source_val_sub[source_indices[valid_source]] * union_widths[valid_source] + ) + + target_indices = np.searchsorted(target_edges, union_centers, side="right") - 1 + + n_targets = len(target_edges) - 1 + + valid_target = (target_indices >= 0) & (target_indices < n_targets) + + total_energy_per_target = np.bincount( + target_indices[valid_target], weights=atomic_energies[valid_target], minlength=n_targets + ) + + target_widths = target_edges[1:] - target_edges[:-1] + + output_values = np.full(n_targets, np.nan) + + valid_width = target_widths > 0 + output_values[valid_width] = total_energy_per_target[valid_width] / target_widths[valid_width] + + return output_values + + +def _weighted_mean_kernel(target_ranges, source_ranges, source_values): + """ + The Numba/Numpy kernel. + Resamples a single ping (1D array) from source geometry to target geometry. + + Updated: Uses exact interval integration for high precision. + + Parameters + ---------- + target_ranges: np.ndarray + 1D array of target bin centers to which the source data will be resampled. + source_ranges: np.ndarray + 1D array of source bin centers representing the original data geometry. + source_values: np.ndarray + 1D array of values corresponding to each source bin. + + Returns + ------- + output: np.ndarray + 1D array of resampled values at the target bin centers. + """ + + valid_source_range_mask = ~np.isnan(source_ranges) + valid_target_range_mask = ~np.isnan(target_ranges) + + if not np.any(valid_source_range_mask) or not np.any(valid_target_range_mask): + return np.full_like(target_ranges, np.nan) + + source_range = source_ranges[valid_source_range_mask].astype(np.float64) + target_range_valid = target_ranges[valid_target_range_mask].astype(np.float64) + + source_value = source_values[valid_source_range_mask].astype(np.float64) + + # Define edges source + # Estimate bin edges from sample centers + if len(source_range) > 1: + source_mid = 0.5 * (source_range[:-1] + source_range[1:]) + source_edge = np.concatenate( + ( + [source_range[0] - (source_range[1] - source_range[0]) / 2], + source_mid, + [source_range[-1] + (source_range[-1] - source_range[-2]) / 2], + ) + ) + else: + source_edge = np.array([source_range[0] - 0.5, source_range[0] + 0.5]) + + # Define edges target + if len(target_range_valid) > 1: + target_mid = 0.5 * (target_range_valid[:-1] + target_range_valid[1:]) + target_edges = np.concatenate( + ( + [target_range_valid[0] - (target_range_valid[1] - target_range_valid[0]) / 2], + target_mid, + [target_range_valid[-1] + (target_range_valid[-1] - target_range_valid[-2]) / 2], + ) + ) + else: + target_edges = np.array([target_range_valid[0] - 0.5, target_range_valid[0] + 0.5]) + + source_value = np.nan_to_num(source_value, nan=0.0) + + target_mean_power = _exact_integration_kernel(target_edges, source_edge, source_value) + + output = np.full_like(target_ranges, np.nan, dtype=np.float64) + output[valid_target_range_mask] = target_mean_power + + return output diff --git a/echopype/tests/commongrid/test_commongrid_api.py b/echopype/tests/commongrid/test_commongrid_api.py index 63ce2ebc2..2a0d889b9 100644 --- a/echopype/tests/commongrid/test_commongrid_api.py +++ b/echopype/tests/commongrid/test_commongrid_api.py @@ -10,10 +10,75 @@ _parse_x_bin, _groupby_x_along_channels, get_distance_from_latlon, - compute_raw_NASC + compute_raw_NASC, + _weighted_mean_kernel, ) from echopype.tests.commongrid.conftest import get_NASC_echoview +@pytest.fixture +def ek80_path(test_path): + return test_path["EK80"] + +@pytest.fixture +def calculate_total_energy(): + """ + Returns a function that calculates the total integrated energy + (Linear Sv * Thickness) per ping for a specific channel. + """ + def _calc(ds, channel): + # Select Data + ds_channel = ds.sel(channel=channel) + sv = ds_channel['Sv'].values + echo_range = ds_channel['echo_range'].values + + n_pings = sv.shape[0] + total_energy = np.zeros(n_pings) + + for i in range(n_pings): + # Extract row + range_row = echo_range[i, :] + sv_row = sv[i, :] + + # Mask + valid_range = ~np.isnan(range_row) + + if not np.any(valid_range): + total_energy[i] = 0.0 + continue + + # Get valid geometry + range_valid = range_row[valid_range] + sv_valid = sv_row[valid_range] + + # Calculate Thickness using Midpoints + if len(range_valid) > 1: + midpoints = 0.5 * (range_valid[:-1] + range_valid[1:]) + + d_start = range_valid[1] - range_valid[0] + d_end = range_valid[-1] - range_valid[-2] + + edges = np.concatenate([ + [range_valid[0] - d_start/2], + midpoints, + [range_valid[-1] + d_end/2] + ]) + + thickness = np.diff(edges) + else: + thickness = np.array([1.0]) + + linear_sv = 10 ** (sv_valid / 10.0) + + linear_sv = np.nan_to_num(linear_sv, nan=0.0) + + total_energy[i] = np.sum(linear_sv * thickness) + + return total_energy + + return _calc + + + # Utilities Tests @pytest.mark.unit @@ -89,8 +154,7 @@ def test__groupby_x_along_channels(request, range_var, lat_lon): # Check that the range_var is in the dimension assert f"{range_var}_bins" in sv_mean.dims - - + # NASC Tests @pytest.mark.integration @pytest.mark.parametrize("compute_mvbs", [True, False]) @@ -605,3 +669,457 @@ def test_compute_reindex_non_NaN_not_map_reduce(request): for reindex in [True, False]: with pytest.raises(ValueError, match=f"Passing in reindex={reindex} is only allowed when method='map_reduce'."): # noqa: E501 ep.commongrid.compute_MVBS(ds_Sv, method=method, reindex=reindex) + +# Tests for resampling to geometry + +@pytest.mark.unit +@pytest.mark.parametrize( + ["scenario", "source_params", "target_params", "expected_value"], + [ + # Downsampling) + ("downsample_const", {"start": 0, "stop": 1000, "step": 0.1, "val": 5.0}, + {"start": 0, "stop": 1000, "step": 2.0}, 5.0), + + # Upsampling + ("upsample_const", {"start": 0, "stop": 1000, "step": 0.2, "val": 10.0}, + {"start": 0, "stop": 1000, "step": 0.1}, 10.0), + + # No Change + ("identity", {"start": 0, "stop": 1000, "step": 1.0, "val": 42.0}, + {"start": 0, "stop": 1000, "step": 1.0}, 42.0), + ], +) +def test__weighted_mean_kernel(scenario, source_params, target_params, expected_value): + """ + Tests the Numba/Numpy regridding kernel for energy/magnitude conservation. + """ + + source_ranges = np.arange(source_params["start"], source_params["stop"], source_params["step"]) + source_values = np.full_like(source_ranges, source_params["val"]) + + target_ranges = np.arange(target_params["start"], target_params["stop"], target_params["step"]) + + output = _weighted_mean_kernel(target_ranges, source_ranges, source_values) + + + assert output.shape == target_ranges.shape + # Energy Conservation Check + source_width = source_params["step"] + target_width = target_params["step"] + + + source_energy = np.sum(source_values) * source_width + target_energy = np.nansum(output) * target_width + + assert np.isclose(source_energy, target_energy, rtol=0.05), \ + f"Scenario '{scenario}' failed energy conservation." + +@pytest.mark.unit +@pytest.mark.parametrize( + ("er_type"), + [ + ("regular"), + ], +) +def test_resample_target_channel_same(request, er_type): + """Testing that the resampling function preserves the target channel""" + if er_type == "regular": + ds_Sv = request.getfixturevalue("ds_Sv_echo_range_regular") + else: + ds_Sv = request.getfixturevalue("ds_Sv_echo_range_irregular") + + channel = ds_Sv["channel"].values[0] + + ds_regridded = ep.commongrid.resample_to_geometry(ds_Sv.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), target_variable="Sv", target_channel=channel) + + original_channel = ds_Sv["Sv"].sel(channel=channel) + reggrided_channel = ds_regridded["Sv"].sel(channel=channel) + + xr.testing.assert_allclose( + reggrided_channel, + original_channel, + rtol=1e-12, + atol=1e-12) + + +@pytest.mark.integration +@pytest.mark.parametrize( + ("er_type"), + [ + ("regular"), + ], +) +def test_resample_with_channel(request, er_type, calculate_total_energy): + """Testing the resample_to_geometry by evaluating energy loss after resampling using a target channel""" + if er_type == "regular": + ds_Sv = request.getfixturevalue("ds_Sv_echo_range_regular") + else: + ds_Sv = request.getfixturevalue("ds_Sv_echo_range_irregular") + channel = ds_Sv["channel"].values[0] + + ds_regridded = ep.commongrid.resample_to_geometry(ds_Sv.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), target_variable="Sv", target_channel=channel) + + channels = ds_Sv["channel"].values + total_energy_original = [calculate_total_energy(ds_Sv, ch) for ch in channels] + total_energy_regridded = [calculate_total_energy(ds_regridded, ch) for ch in channels] + + np.testing.assert_allclose( + total_energy_original, + total_energy_regridded, + rtol=1e-3, + err_msg="Total energy was not conserved during regridding!" + ) + +@pytest.mark.integration +@pytest.mark.parametrize( + ("er_type"), + [ + ("regular"), + ], +) +def test_resample_with_grid(request, er_type, calculate_total_energy): + """Testing the resample_to_geometry by evaluating energy loss after resampling using a target grid""" + if er_type == "regular": + ds_Sv = request.getfixturevalue("ds_Sv_echo_range_regular") + else: + ds_Sv = request.getfixturevalue("ds_Sv_echo_range_irregular") + + channel = ds_Sv["channel"].values[0] + + ds_regridded = ep.commongrid.resample_to_geometry( + ds_Sv.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), + target_variable="Sv", + target_grid=ds_Sv.chunk({"channel": 1, "ping_time": 500, "range_sample": -1})["echo_range"].sel(channel=channel) + ) + + channels = ds_Sv["channel"].values + + # We can now call the factory function returned by the fixture + total_energy_original = [calculate_total_energy(ds_Sv, ch) for ch in channels] + total_energy_regridded = [calculate_total_energy(ds_regridded, ch) for ch in channels] + + np.testing.assert_allclose( + total_energy_original, + total_energy_regridded, + rtol=1e-3, + err_msg="Total energy was not conserved during regridding!" + ) + +# Integration test with EK80 data + +@pytest.mark.integration +def test_range_spacing(ek80_path): + """Testing the rsampling interval being accurate after using regrid function""" + + ek80_raw_path = str( + ek80_path.joinpath('ar2.0-D20201210-T000409.raw') + ) # CW complex + echodata = ep.open_raw(ek80_raw_path, sonar_model='EK80') + ds_Sv = ep.calibrate.compute_Sv( + echodata, waveform_mode='CW', encode_mode='complex' + ) + + channel = ds_Sv["channel"].values[0] + + ds_regridded = ep.commongrid.resample_to_geometry(ds_Sv, target_variable="Sv", target_channel=channel) + + c = float(ds_Sv["sound_speed"].values) + dt = float(echodata["Sonar/Beam_group1"]["sample_interval"].sel(channel=channel).median("ping_time").values) + + delta_expected = c * dt / 2.0 + + for ch in ds_regridded.channel.values: + r = ds_regridded["echo_range"].sel(channel=ch).isel(ping_time=0).values + + idx = np.where(np.isfinite(r))[0][:2] + + assert len(idx) == 2, f"Not enough finite echo_range values to compute delta for channel {ch}" + + delta_actual = float(r[idx[1]] - r[idx[0]]) + + np.testing.assert_allclose( + delta_actual, + delta_expected, + rtol=1e-4, + err_msg=f"Resolution mismatch on channel {ch}. Expected {delta_expected}, got {delta_actual}." + ) + +@pytest.mark.unit +def test_resample_log_variable_sp(ds_Sv_echo_range_regular): + """ + Test that Sp variables are resampled through the same + linear-domain averaging pathway as Sv, with a warning. + """ + + ds = ds_Sv_echo_range_regular.copy() + ds["Sp"] = ds["Sv"].copy() + + channel = ds["channel"].values[0] + + with pytest.warns(UserWarning, match="primarily intended and validated for Sv"): + ds_regridded = ep.commongrid.resample_to_geometry( + ds.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), + target_variable="Sp", + target_channel=channel, + ) + + xr.testing.assert_allclose( + ds_regridded["Sp"].sel(channel=channel), + ds["Sp"].sel(channel=channel), + rtol=1e-12, + atol=1e-12, + ) + +@pytest.mark.unit +def test_resample_log_variable_ts(ds_Sv_echo_range_regular): + """ + Test that TS variables are resampled through the same + linear-domain averaging pathway as Sv, with a warning. + """ + + ds = ds_Sv_echo_range_regular.copy() + ds["TS"] = ds["Sv"].copy() + + channel = ds["channel"].values[0] + + with pytest.warns(UserWarning, match="primarily intended and validated for Sv"): + ds_regridded = ep.commongrid.resample_to_geometry( + ds.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), + target_variable="TS", + target_channel=channel, + ) + + xr.testing.assert_allclose( + ds_regridded["TS"].sel(channel=channel), + ds["TS"].sel(channel=channel), + rtol=1e-12, + atol=1e-12, + ) + +@pytest.mark.unit +def test_resample_requires_exactly_one_target(ds_Sv_echo_range_regular): + """ + Ensure that users provide exactly one target geometry. + """ + + channel = ds_Sv_echo_range_regular["channel"].values[0] + + target_grid = ( + ds_Sv_echo_range_regular["echo_range"] + .isel(channel=0) + ) + + with pytest.raises(ValueError): + ep.commongrid.resample_to_geometry( + ds_Sv_echo_range_regular, + target_variable="Sv", + ) + + with pytest.raises(ValueError): + ep.commongrid.resample_to_geometry( + ds_Sv_echo_range_regular, + target_variable="Sv", + target_channel=channel, + target_grid=target_grid, + ) + +@pytest.mark.unit +def test_resample_angle_variable_warns(ds_Sv_echo_range_regular): + """ + Angle variables are currently resampled geometrically. + A warning should be emitted. + """ + + ds = ds_Sv_echo_range_regular.copy() + ds["angle_alongship"] = xr.zeros_like(ds["Sv"]) + + channel = ds["channel"].values[0] + + with pytest.warns(UserWarning, match="Angle variables are resampled geometrically"): + ep.commongrid.resample_to_geometry( + ds.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), + target_variable="angle_alongship", + target_channel=channel, + ) + +@pytest.mark.integration +def test_resample_matches_echoview_match_geometry(test_path): + """ + Compare resample_to_geometry against Echoview Match Geometry output. + + The comparison excludes the near-transducer region where Echoview and + echopype handle the first samples differently. + """ + + raw_path = ( + test_path["EK80"] + / "ncei-wcsd" + / "SH2306" + / "Hake-D20230811-T165727.raw" + ) + + reference_dir = test_path["RESAMPLE_GEOMETRY"] + + echoview_files = { + 18_000: reference_dir / "18kHz_regridded.sv.csv", + 120_000: reference_dir / "120kHz_regridded.sv.csv", + 200_000: reference_dir / "200kHz.sv.csv", + } + + ed = ep.open_raw(raw_path, sonar_model="EK80") + + ds_Sv = ep.calibrate.compute_Sv( + echodata=ed, + waveform_mode="CW", + encode_mode="power", + ) + + ds_Sv = ep.consolidate.add_depth(ds_Sv) + + target_channel = ds_Sv.channel.sel( + channel=ds_Sv.frequency_nominal == 200_000 + ).item() + + ds_regridded = ep.commongrid.resample_to_geometry( + ds_Sv.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), + target_variable="Sv", + target_channel=target_channel, + ) + + echoview_arrays = [] + + for freq, path in echoview_files.items(): + channel = ds_regridded.channel.sel( + channel=ds_regridded.frequency_nominal == freq + ).item() + + arr = pd.read_csv(path, header=None).to_numpy(dtype=float) + template = ds_regridded["Sv"].sel(channel=channel) + + if ( + arr.shape[0] != template.sizes["ping_time"] + and arr.shape[1] == template.sizes["ping_time"] + ): + arr = arr.T + + assert arr.shape[0] == template.sizes["ping_time"] + + template = template.isel(range_sample=slice(0, arr.shape[1])) + + da = xr.DataArray( + arr, + dims=("ping_time", "range_sample"), + coords={ + "ping_time": template["ping_time"], + "range_sample": template["range_sample"], + }, + name="Sv", + ).expand_dims(channel=[channel]) + + echoview_arrays.append(da) + + echoview_Sv = xr.concat(echoview_arrays, dim="channel") + + ds_echoview = xr.Dataset( + data_vars={ + "Sv": echoview_Sv, + "echo_range": ds_regridded["echo_range"] + .sel(channel=echoview_Sv.channel) + .isel(range_sample=slice(0, echoview_Sv.sizes["range_sample"])), + "frequency_nominal": ds_regridded["frequency_nominal"].sel( + channel=echoview_Sv.channel + ), + } + ) + + ds_regridded = ( + ds_regridded + .sel(channel=ds_echoview.channel) + .isel(range_sample=slice(0, ds_echoview.sizes["range_sample"])) + ) + + xr.testing.assert_allclose( + ds_regridded["echo_range"], + ds_echoview["echo_range"], + rtol=0, + atol=0, + ) + + # Echoview and echopype differ in how the first samples near the + # transducer are represented after regridding. Exclude this region + # and compare only the overlapping valid portion of the water column. + min_range_m = 2.0 + + diff = ( + ds_regridded["Sv"] + .where(ds_echoview["echo_range"] > min_range_m) + - ds_echoview["Sv"].where(ds_echoview["echo_range"] > min_range_m) + ).compute() + + # Compare only finite values because the masking above introduces NaNs + # outside the comparison region. + finite_diff = diff.values[np.isfinite(diff.values)] + assert finite_diff.size > 0 + + np.testing.assert_allclose( + finite_diff, + np.zeros_like(finite_diff), + atol=0.003, + rtol=0, + ) + +@pytest.mark.integration +def test_resample_shared_depth_and_range_geometry(test_path): + """ + Test that channels share the same echo_range geometry after resampling, + and the same depth geometry after applying add_depth(). + """ + + raw_path = ( + test_path["EK80"] + / "ncei-wcsd" + / "SH2306" + / "Hake-D20230811-T165727.raw" + ) + + ed = ep.open_raw(raw_path, sonar_model="EK80") + + ds_Sv = ep.calibrate.compute_Sv( + echodata=ed, + waveform_mode="CW", + encode_mode="power", + ) + + ds_Sv = ep.consolidate.add_depth(ds_Sv) + + target_channel = ds_Sv.channel.sel( + channel=ds_Sv.frequency_nominal == 200_000 + ).item() + + ds_regridded = ep.commongrid.resample_to_geometry( + ds_Sv.chunk({"channel": 1, "ping_time": 1000, "range_sample": -1}), + target_variable="Sv", + target_channel=target_channel, + ) + + # Verify add_depth() remains compatible with the resampled output + ds_regridded = ep.consolidate.add_depth(ds_regridded) + + ref_echo_range = ds_regridded["echo_range"].sel(channel=target_channel) + ref_depth = ds_regridded["depth"].sel(channel=target_channel) + + for ch in ds_regridded.channel.values: + + np.testing.assert_allclose( + ds_regridded["echo_range"].sel(channel=ch).values, + ref_echo_range.values, + atol=0, + rtol=0, + ) + + np.testing.assert_allclose( + ds_regridded["depth"].sel(channel=ch).values, + ref_depth.values, + atol=0, + rtol=0, + ) \ No newline at end of file diff --git a/echopype/tests/conftest.py b/echopype/tests/conftest.py index 4dd4fed52..6ab4a58c6 100644 --- a/echopype/tests/conftest.py +++ b/echopype/tests/conftest.py @@ -62,7 +62,7 @@ "es70.zip": "sha256:a6b4f27f33f09bace26264de6984fdb4111a3a0337bc350c3c1d25c8b3effc7c", "es80.zip": "sha256:b37ee01462f46efe055702c20be67d2b8c6b786844b183b16ffc249c7c5ec704", "legacy_datatree.zip": "sha256:820cd252047dbf35fa5fb04a9aafee7f7659e0fe4f7d421d69901c57deb6c9d5", # noqa: E501 - "resample_to_geometry_example_data.zip": "sha256:7e4bca9a7e87ff70a7685eb4d9dfa52e7ac16e1acbeeb83601a79be5a6ac182c", + "resample_to_geometry_example_data.zip": "sha256:1a45e3ac31ef16d742b16155dc4b7f62511abd8d25547f55fcd9146446f60d07", } EP = pooch.create(