From edcf8e86aec098e52d9bcf8f1ac56e2925c5e5dc Mon Sep 17 00:00:00 2001 From: Karl Tarbet Date: Tue, 2 Jun 2026 07:41:39 -0700 Subject: [PATCH 1/3] Using new /peaks endpoint (first iteration) --- Readme.md | 125 +++++------------- src/main/java/org/opendcs/Demo.java | 20 +++ .../usgs/waterdata/InstantaneousValue.java | 28 +++- .../usgs/waterdata/UsgsWaterDataApi.java | 41 ++++-- .../opendcs/usgs/waterdata/PeaksCsvTest.java | 42 ++++++ .../usgs/waterdata/UsgsWaterDataApiTest.java | 76 ++++++++--- 6 files changed, 213 insertions(+), 119 deletions(-) create mode 100644 src/test/java/org/opendcs/usgs/waterdata/PeaksCsvTest.java diff --git a/Readme.md b/Readme.md index 14ab63b..0ecddf3 100644 --- a/Readme.md +++ b/Readme.md @@ -1,6 +1,6 @@ # usgs-water-api -A Java library for retrieving hydrologic data (daily values, peaks, continous (15-minute), time-series metadata, monitoring locations) from the USGS Water Data API. +A Java library for retrieving hydrologic data (daily values, peaks, continous (~15-minute), time-series metadata, monitoring locations) from the USGS Water Data API. ```gradle @@ -63,6 +63,26 @@ implementation("org.opendcs:usgs-waterdata-api:0.3.*") eastfenderTS.printToConsole(5); + // Read annual peak flow and stage (one value per water year) + System.out.println("\nRead Annual Peaks, Boise River near Featherville"); + var peakMetadata = UsgsWaterDataApi.getTimeSeriesMetadata("USGS-13186000"); + + // Peaks metadata is marked with computationIdentifier "Max At Event Time" + var flowMeta = TimeSeriesMetadata.filter(peakMetadata) + .parameterCode(Parameter.DISCHARGE).computation("Max At Event Time") + .findFirst().orElseThrow(); + var stageMeta = TimeSeriesMetadata.filter(peakMetadata) + .parameterCode(Parameter.STAGE).computation("Max At Event Time") + .findFirst().orElseThrow(); + + TimeSeries peakFlow = UsgsWaterDataApi.getAnnualPeaks(flowMeta); + TimeSeries peakStage = UsgsWaterDataApi.getAnnualPeaks(stageMeta); + + System.out.println("Peak " + peakFlow.getParameterName() + " (" + peakFlow.getUnitOfMeasure() + "):"); + peakFlow.printToConsole(5); + System.out.println("Peak " + peakStage.getParameterName() + " (" + peakStage.getUnitOfMeasure() + "):"); + peakStage.printToConsole(5); + ```output.txt Read Daily Mean Discharge, Boise River at Parma Station: USGS-13213000 @@ -108,6 +128,20 @@ Reading East Fender time-series... (current data) 2026-04-10T15:15:00Z = 19.0 2026-04-10T15:30:00Z = 19.0 2026-04-10T15:45:00Z = 19.0 + +Read Annual Peaks, Boise River near Featherville +Peak Discharge (ft^3/s): + 1945-05-05T00:00:00Z = 2930.0 + 1946-04-27T00:00:00Z = 4210.0 + 1947-05-09T00:00:00Z = 4300.0 + 1948-05-29T00:00:00Z = 4750.0 + 1949-05-17T00:00:00Z = 3880.0 +Peak Gage height (ft): + 1945-05-05T00:00:00Z = 5.43 + 1946-04-27T00:00:00Z = 6.66 + 1947-05-09T00:00:00Z = 6.75 + 1948-05-29T00:00:00Z = 6.87 + 1949-05-17T00:00:00Z = 6.15 ``` @@ -139,91 +173,4 @@ Files are named from the response `Content-Disposition` header. Duplicate filena # TODO - - Allow user to specify what paramters they want such as 'Flow' - - Use the /combined-metadata endpoint instead (For list of sites with daily data) and filter on data_type. You'll get one row per data collection rather than one row per site - - keep querys below MAX size.. - - document -Djava.net.useSystemProxies=true - - USGS to do: /ogcapi/v0/collections/peaks and /stac/v0/collections/ratings - -Station Name=HYDER AK -Stream Name=SALMON R -Station ID=15008000 -Version Name=USGS -Latitude=56.0259971857126 -Longitude=-130.06687006967235 -Elevation=286 -Coord Datum=NAD27 -PARAMETERS=ALL -PARAMETERS=Flow,Stage (new feature) - - - put Site-types in config somewhere: - - Internally use MonitoringLocation instead of legacy UsgsStation - - get period of record from api (start with Daily) - - get more site types. - - when reading regular time series , check dates for ordering () - -gs-w_waterdata_support@usgs.gov - - -Reference and Examples - - - -USGS 13037500 SNAKE RIVER NR HEISE ID - -Parameter types -00060 Discharge cfs 1Day -00065 Gage Height ft - -https://api.waterdata.usgs.gov/ogcapi/v0/collections/parameter-codes/items - - -monitoring locations - -curl -X 'GET' \ - 'https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=csv&lang=en-US&limit=10000&skipGeometry=true&offset=0&agency_code=USGS&state_code=06&site_type_code=ST' \ - -H 'accept: application/geo+json' - -https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items -Streams ST: -Canal: ST-Canal -Tidal Stream: ST-TS -Lake, Reservoir: LK -https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=csv&lang=en-US&limit=10000&skipGeometry=false&offset=0&agency_code=USGS&state_code=06&site_type_code=ST - - - -Time series metadata - -https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?limit=2000&state_name=Utah - - - - -daily data -old: https://waterservices.usgs.gov/nwis/dv?sites=10059500&startDT=2025-02-26&endDT=2026-02-28&format=rdb - - -https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=csv&lang=en-US&limit=1000&properties=time,value,unit_of_measure&skipGeometry=true&sortby=time&offset=0&monitoring_location_id=USGS-11463500¶meter_code=00060%2C00065&statistic_id=00003&time=2018-02-12T00%3A00%3A00Z%2F2018-03-18T12%3A31%3A12Z - - - - https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=csv&lang=en-US&limit=10&skipGeometry=false&offset=0&monitoring_location_id=USGS-13037500 - - - -15 Minute data... - -/collections/continuous/items - - - -States - -https://api.waterdata.usgs.gov/ogcapi/v0/collections/states/items?f=csv&lang=en-US&limit=10000&skipGeometry=false&offset=0 - - - -curl -X 'GET' \ - 'https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=csv&lang=en-US&limit=10&skipGeometry=true&offset=0¶meter_code=00060%2C00065&statistic_id=00030&state_name=Idaho' \ - -H 'accept: application/geo+json' + - /ratings \ No newline at end of file diff --git a/src/main/java/org/opendcs/Demo.java b/src/main/java/org/opendcs/Demo.java index 539c323..94dbfc7 100644 --- a/src/main/java/org/opendcs/Demo.java +++ b/src/main/java/org/opendcs/Demo.java @@ -58,5 +58,25 @@ public static void main(String[] args) throws Exception { start, end); eastfenderTS.printToConsole(5); + + // Read annual peak flow and stage (one value per water year) + System.out.println("\nRead Annual Peaks, Boise River near Featherville"); + var peakMetadata = UsgsWaterDataApi.getTimeSeriesMetadata("USGS-13186000"); + + // Peaks metadata is marked with computationIdentifier "Max At Event Time" + var flowMeta = TimeSeriesMetadata.filter(peakMetadata) + .parameterCode(Parameter.DISCHARGE).computation("Max At Event Time") + .findFirst().orElseThrow(); + var stageMeta = TimeSeriesMetadata.filter(peakMetadata) + .parameterCode(Parameter.STAGE).computation("Max At Event Time") + .findFirst().orElseThrow(); + + TimeSeries peakFlow = UsgsWaterDataApi.getAnnualPeaks(flowMeta); + TimeSeries peakStage = UsgsWaterDataApi.getAnnualPeaks(stageMeta); + + System.out.println("Peak " + peakFlow.getParameterName() + " (" + peakFlow.getUnitOfMeasure() + "):"); + peakFlow.printToConsole(5); + System.out.println("Peak " + peakStage.getParameterName() + " (" + peakStage.getUnitOfMeasure() + "):"); + peakStage.printToConsole(5); } } \ No newline at end of file diff --git a/src/main/java/org/opendcs/usgs/waterdata/InstantaneousValue.java b/src/main/java/org/opendcs/usgs/waterdata/InstantaneousValue.java index 7d64978..43d923e 100644 --- a/src/main/java/org/opendcs/usgs/waterdata/InstantaneousValue.java +++ b/src/main/java/org/opendcs/usgs/waterdata/InstantaneousValue.java @@ -1,7 +1,11 @@ package org.opendcs.usgs.waterdata; import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; public class InstantaneousValue { public Instant time; @@ -23,11 +27,33 @@ private static Instant parse(String timeStr) { static InstantaneousValue fromRow(DataTable table, int row) { InstantaneousValue v = new InstantaneousValue(); - v.time = parse(table.get(row, "time")); + v.time = parse(table.get(row, "time")); v.value = table.getDouble(row, "value", UsgsWaterDataApi.UNDEFINED_DOUBLE); return v; } + /** + * Builds a value from a peaks-collection row, combining time and time_of_day. + */ + static InstantaneousValue fromPeakRow(DataTable table, int row) { + InstantaneousValue v = new InstantaneousValue(); + v.time = parsePeakTime(table.get(row, "time"), table.get(row, "time_of_day")); + v.value = table.getDouble(row, "value", UsgsWaterDataApi.UNDEFINED_DOUBLE); + return v; + } + + static Instant parsePeakTime(String dateStr, String timeStr) { + LocalDate date = LocalDate.parse(dateStr); + if (timeStr != null && !timeStr.isBlank()) { + try { + return date.atTime(LocalTime.parse(timeStr.trim())).toInstant(ZoneOffset.UTC); + } catch (DateTimeParseException e) { + // fall through to date-only + } + } + return date.atStartOfDay().toInstant(ZoneOffset.UTC); + } + @Override public String toString() { return time + " = " + value; diff --git a/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java b/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java index f6d54c4..feef683 100644 --- a/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java +++ b/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java @@ -41,6 +41,9 @@ public class UsgsWaterDataApi { static final String TIME_SERIES_QUERY_ID = "items?f=csv&lang=en-US&limit=50000&properties=time,value&skipGeometry=true&sortby=time&offset=0&time_series_id=%s&time=%s/%s"; static final String DAILY_URL_ID = ROOT_URL + "daily/" + TIME_SERIES_QUERY_ID; static final String CONTINUOUS_URL_ID = ROOT_URL + "continuous/" + TIME_SERIES_QUERY_ID; + static final String PEAKS_QUERY = "peaks/items?f=csv&lang=en-US&limit=50000&properties=value,time,time_of_day&skipGeometry=true&sortby=time&offset=0&time_series_id=%s"; + static final String PEAKS_URL = ROOT_URL + PEAKS_QUERY; + static final String PEAKS_URL_RANGE = ROOT_URL + PEAKS_QUERY + "&time=%s/%s"; static final String TIME_SERIES_METADATA_PROPERTIES = "id,unit_of_measure,parameter_name,parameter_code,statistic_id,hydrologic_unit_code,state_name,last_modified,begin,end,begin_utc,end_utc,computation_period_identifier,computation_identifier,thresholds,sublocation_identifier,primary,monitoring_location_id,web_description,parameter_description,parent_time_series_id"; static final String TIME_SERIES_METADATA_URL = ROOT_URL + "time-series-metadata/items?f=csv&lang=en-US&limit=50000&properties=" + TIME_SERIES_METADATA_PROPERTIES + "&skipGeometry=false&offset=0&monitoring_location_id=%s"; static final String TIME_SERIES_METADATA_POST_URL = ROOT_URL + "time-series-metadata/items?f=csv&lang=en-US&limit=50000&properties=" + TIME_SERIES_METADATA_PROPERTIES + "&skipGeometry=false&offset=0"; @@ -253,21 +256,39 @@ public static Map> getTimeSeriesMetadata(String } /** - * Retrieves annual peak streamflow and gage height from the legacy NWIS peak-flow service. - * @see PeakFlowService + * Retrieves annual peaks for the time series identified by the given metadata. + * @see #getAnnualPeaks(TimeSeriesMetadata, String, String) */ - public static List> getAnnualPeaks( - List siteMetadata) throws Exception { - return PeakFlowService.getAnnualPeaks(siteMetadata); + public static TimeSeries getAnnualPeaks(TimeSeriesMetadata metadata) throws Exception { + return getAnnualPeaks(metadata, null, null); } /** - * Retrieves annual peak streamflow and gage height within a date range. - * @see PeakFlowService + * Retrieves annual peaks within a date range for the time series identified by the given metadata. + * + * @param metadata identifies the peak time series (computationIdentifier "Max At Event Time") + * @param startDate start of date range (yyyy-MM-dd), or null for no lower bound + * @param endDate end of date range (yyyy-MM-dd), or null for no upper bound */ - public static List> getAnnualPeaks( - List siteMetadata, String startDate, String endDate) throws Exception { - return PeakFlowService.getAnnualPeaks(siteMetadata, startDate, endDate); + public static TimeSeries getAnnualPeaks(TimeSeriesMetadata metadata, + String startDate, String endDate) throws Exception { + return new TimeSeries<>(metadata, fetchPeaks(metadata.id, startDate, endDate)); + } + + /** + * Fetches annual peaks for one time series from the peaks collection. + */ + private static List fetchPeaks(String timeSeriesId, + String startDate, String endDate) throws Exception { + String url; + if (startDate != null && endDate != null) { + url = String.format(PEAKS_URL_RANGE, timeSeriesId, startDate, endDate); + } else { + url = String.format(PEAKS_URL, timeSeriesId); + } + String csv = WebUtility.getPage(url); + if (csv == null || csv.isBlank()) return Collections.emptyList(); + return CsvFile.fromString(csv).mapRows(InstantaneousValue::fromPeakRow); } } diff --git a/src/test/java/org/opendcs/usgs/waterdata/PeaksCsvTest.java b/src/test/java/org/opendcs/usgs/waterdata/PeaksCsvTest.java new file mode 100644 index 0000000..55c36da --- /dev/null +++ b/src/test/java/org/opendcs/usgs/waterdata/PeaksCsvTest.java @@ -0,0 +1,42 @@ +package org.opendcs.usgs.waterdata; + +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for parsing the Peak Csv format. + */ +class PeaksCsvTest { + + private static final String PEAKS_CSV = + "value,time,time_of_day\n" + + "12300,1922-07-12,\n" + // no time of day -> start of day + "45600,2006-03-13,15:30\n" + // HH:mm time of day + ",2010-05-01,\n"; // missing value -> UNDEFINED_DOUBLE + + private List parse() throws Exception { + return CsvFile.fromString(PEAKS_CSV).mapRows(InstantaneousValue::fromPeakRow); + } + + @Test + void emptyTimeOfDay_returnsStartOfDay() throws Exception { + assertEquals(Instant.parse("1922-07-12T00:00:00Z"), parse().get(0).time); + } + + @Test + void timeOfDayWithHHmm_parsesCorrectly() throws Exception { + assertEquals(Instant.parse("2006-03-13T15:30:00Z"), parse().get(1).time); + } + + @Test + void valuesParsedWithMissingAsUndefined() throws Exception { + List values = parse(); + assertEquals(12300.0, values.get(0).value); + assertEquals(45600.0, values.get(1).value); + assertEquals(UsgsWaterDataApi.UNDEFINED_DOUBLE, values.get(2).value, "missing value -> undefined"); + } +} diff --git a/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java b/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java index 0a468ff..9a47e71 100644 --- a/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java +++ b/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java @@ -257,22 +257,26 @@ void getContinuousTimeSeries_twoYears_noDuplicateTimes() throws Exception { } } + /** Finds the peak time series (computationIdentifier "Max At Event Time") for a parameter. */ + private TimeSeriesMetadata peakSeries(List metadata, String parameterCode) { + return TimeSeriesMetadata.filter(metadata) + .parameterCode(parameterCode).computation("Max At Event Time") + .findFirst().orElseThrow(() -> new AssertionError("No peak series for parameter " + parameterCode)); + } + /** - * Tests retrieving annual peak streamflow from the legacy NWIS RDB service. + * Tests retrieving annual peak streamflow and stage for the Fox River. * * ./gradlew integrationTest --tests "org.opendcs.usgs.waterdata.UsgsWaterDataApiTest.getAnnualPeaks_FoxRiver" -PusgsDebug=true */ @Test @Tag("integration") void getAnnualPeaks_FoxRiver() throws Exception { - // Fox River at Wayland, MO — site 05495000 (same site from the RDB example) + // Fox River at Wayland, MO - site 05495000 var siteMetadata = UsgsWaterDataApi.getTimeSeriesMetadata("USGS-05495000"); - List> peakSeries = UsgsWaterDataApi.getAnnualPeaks(siteMetadata); - assertEquals(2, peakSeries.size(), "Expected discharge and stage series"); - - TimeSeries flowPeaks = peakSeries.get(0); - TimeSeries stagePeaks = peakSeries.get(1); + TimeSeries flowPeaks = UsgsWaterDataApi.getAnnualPeaks(peakSeries(siteMetadata, Parameter.DISCHARGE)); + TimeSeries stagePeaks = UsgsWaterDataApi.getAnnualPeaks(peakSeries(siteMetadata, Parameter.STAGE)); assertFalse(flowPeaks.isEmpty(), "Expected annual peak flow values"); assertFalse(stagePeaks.isEmpty(), "Expected annual peak stage values"); @@ -280,14 +284,12 @@ void getAnnualPeaks_FoxRiver() throws Exception { assertEquals(Parameter.DISCHARGE, flowPeaks.metadata.parameterCode); assertEquals(Parameter.STAGE, stagePeaks.metadata.parameterCode); - assertEquals("Water Year", flowPeaks.metadata.computationPeriodIdentifier); assertEquals("Max At Event Time", flowPeaks.metadata.computationIdentifier); - // Verify units come from metadata, not hardcoded + // Units come from metadata, not hardcoded assertNotNull(flowPeaks.metadata.unitOfMeasure, "Flow units should come from metadata"); assertNotNull(stagePeaks.metadata.unitOfMeasure, "Stage units should come from metadata"); - logger.info("Flow units: " + flowPeaks.metadata.unitOfMeasure); - logger.info("Stage units: " + stagePeaks.metadata.unitOfMeasure); + logger.info("Flow units: " + flowPeaks.getUnitOfMeasure() + ", Stage units: " + stagePeaks.getUnitOfMeasure()); // First peak should be from 1922 InstantaneousValue first = flowPeaks.get(0); @@ -295,9 +297,7 @@ void getAnnualPeaks_FoxRiver() throws Exception { assertEquals(1922, LocalDate.ofInstant(first.time, java.time.ZoneOffset.UTC).getYear()); for (int i = 0; i < Math.min(5, flowPeaks.size()); i++) { - InstantaneousValue flow = flowPeaks.get(i); - InstantaneousValue stage = stagePeaks.get(i); - logger.info(" " + flow.time + " flow=" + flow.value + " stage=" + stage.value); + logger.info(" " + flowPeaks.get(i).time + " flow=" + flowPeaks.get(i).value); } } @@ -306,11 +306,8 @@ void getAnnualPeaks_FoxRiver() throws Exception { void getAnnualPeaks_withDateRange() throws Exception { var siteMetadata = UsgsWaterDataApi.getTimeSeriesMetadata("USGS-05495000"); - List> peakSeries = - UsgsWaterDataApi.getAnnualPeaks(siteMetadata, "2000-01-01", "2010-12-31"); - assertFalse(peakSeries.isEmpty()); - - TimeSeries flowPeaks = peakSeries.get(0); + TimeSeries flowPeaks = UsgsWaterDataApi.getAnnualPeaks( + peakSeries(siteMetadata, Parameter.DISCHARGE), "2000-01-01", "2010-12-31"); assertFalse(flowPeaks.isEmpty(), "Expected peaks in 2000-2010 range"); logger.info("Peaks in range: " + flowPeaks.size()); @@ -320,4 +317,45 @@ void getAnnualPeaks_withDateRange() throws Exception { } } + /** + * Demonstrates the metadata-to-peaks scenario: pull site metadata, filter precisely to the + * peak flow and peak stage time series, then query each one. + * + * ./gradlew integrationTest --tests "org.opendcs.usgs.waterdata.UsgsWaterDataApiTest.annualPeaks_metadataScenario" -PusgsDebug=true + */ + @Test + @Tag("integration") + void annualPeaks_metadataScenario() throws Exception { + String locationId = "USGS-13186000"; // Boise River near Featherville + + List metadata = UsgsWaterDataApi.getTimeSeriesMetadata(locationId); + assertFalse(metadata.isEmpty(), "Expected time-series metadata for " + locationId); + logger.info(locationId + " metadata count: " + metadata.size()); + + TimeSeriesMetadata flowMeta = TimeSeriesMetadata.filter(metadata) + .parameterCode(Parameter.DISCHARGE).computation("Max At Event Time") + .findFirst().orElseThrow(() -> new AssertionError("Expected a peak discharge series")); + TimeSeriesMetadata stageMeta = TimeSeriesMetadata.filter(metadata) + .parameterCode(Parameter.STAGE).computation("Max At Event Time") + .findFirst().orElseThrow(() -> new AssertionError("Expected a peak stage series")); + + assertEquals("Water Year", flowMeta.computationPeriodIdentifier); + assertEquals("ft^3/s", flowMeta.unitOfMeasure); + assertEquals("ft", stageMeta.unitOfMeasure); + + TimeSeries flowPeaks = UsgsWaterDataApi.getAnnualPeaks(flowMeta); + TimeSeries stagePeaks = UsgsWaterDataApi.getAnnualPeaks(stageMeta); + + assertEquals(Parameter.DISCHARGE, flowPeaks.metadata.parameterCode); + assertEquals(Parameter.STAGE, stagePeaks.metadata.parameterCode); + assertFalse(flowPeaks.isEmpty(), "Expected annual peak flow values"); + assertFalse(stagePeaks.isEmpty(), "Expected annual peak stage values"); + + logger.info("Flow peaks: " + flowPeaks.size() + " [" + flowPeaks.getUnitOfMeasure() + "]"); + logger.info("Stage peaks: " + stagePeaks.size() + " [" + stagePeaks.getUnitOfMeasure() + "]"); + for (int i = 0; i < Math.min(5, flowPeaks.size()); i++) { + logger.info(" " + flowPeaks.get(i).time + " flow=" + flowPeaks.get(i).value); + } + } + } From 5532930410eb91bea6a5768b4b5312fa9cf939c2 Mon Sep 17 00:00:00 2001 From: Karl Tarbet Date: Tue, 2 Jun 2026 07:56:03 -0700 Subject: [PATCH 2/3] Add test with 5-minute data --- .../usgs/waterdata/UsgsWaterDataApiTest.java | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java b/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java index 9a47e71..ccaf094 100644 --- a/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java +++ b/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java @@ -257,6 +257,36 @@ void getContinuousTimeSeries_twoYears_noDuplicateTimes() throws Exception { } } + /** + * Seven years of 5-minute continuous data at USGS-01477800 (Shellpot Creek), exercising + * chunked queries at a higher resolution than the 15-minute case. The 1999-2006 range far + * exceeds the API's 1100-day limit, so it must be chunked by time. + * + * ./gradlew integrationTest --tests "org.opendcs.usgs.waterdata.UsgsWaterDataApiTest.getContinuousTimeSeries_sevenYears_fiveMinute" -PusgsDebug=true + */ + @Test + @Tag("integration") + void getContinuousTimeSeries_sevenYears_fiveMinute() throws Exception { + String location_id = "USGS-01477800"; + String t1 = "1999-06-01T00:00:00Z"; + String t2 = "2006-06-01T00:00:00Z"; + + TimeSeriesMetadata discharge = TimeSeriesMetadata.filter(UsgsWaterDataApi.getTimeSeriesMetadata(location_id)) + .parameterCode(Parameter.DISCHARGE).statisticId(Statistic.INSTANTANEOUS) + .hasDateRange() + .findFirst().orElseThrow(() -> new AssertionError("Expected an instantaneous discharge series")); + + logger.info("Metadata: " + discharge.parameterCode + " " + discharge.parameterName + + " [" + discharge.unitOfMeasure + "] " + discharge.begin + " to " + discharge.end); + + TimeSeries continuous = UsgsWaterDataApi.getContinuousTimeSeries(discharge, t1, t2); + logger.info(discharge.parameterName + ": " + continuous.size() + " values from " + + continuous.get(0).time + " to " + continuous.get(continuous.size() - 1).time); + + // Seven years of 5-minute discharge, chunked by time across the API's 1100-day limit. + assertEquals(644478, continuous.size(), "Expected 644,478 five-minute discharge values for 1999-2006"); + } + /** Finds the peak time series (computationIdentifier "Max At Event Time") for a parameter. */ private TimeSeriesMetadata peakSeries(List metadata, String parameterCode) { return TimeSeriesMetadata.filter(metadata) From 371d4905b87462f43af91ac415e42984d6cc3dcb Mon Sep 17 00:00:00 2001 From: Karl Tarbet Date: Tue, 2 Jun 2026 10:31:08 -0700 Subject: [PATCH 3/3] Use Consistent UTC for USGS data import. Fix progress bar for USGS imports --- Readme.md | 34 +++- src/main/java/org/opendcs/Demo.java | 5 + .../usgs/waterdata/PeakFlowService.java | 124 ------------ .../org/opendcs/usgs/waterdata/RdbFile.java | 72 ------- .../usgs/waterdata/UsgsWaterDataApi.java | 74 +++++--- .../opendcs/usgs/waterdata/WebUtility.java | 42 ++--- .../usgs/waterdata/PeakFlowServiceTest.java | 60 ------ .../opendcs/usgs/waterdata/RdbFileTest.java | 70 ------- .../usgs/waterdata/UsgsWaterDataApiTest.java | 16 ++ src/test/resources/peaks-05495000.rdb | 178 ------------------ src/test/resources/peaks.rdb | 15 -- 11 files changed, 118 insertions(+), 572 deletions(-) delete mode 100644 src/main/java/org/opendcs/usgs/waterdata/PeakFlowService.java delete mode 100644 src/main/java/org/opendcs/usgs/waterdata/RdbFile.java delete mode 100644 src/test/java/org/opendcs/usgs/waterdata/PeakFlowServiceTest.java delete mode 100644 src/test/java/org/opendcs/usgs/waterdata/RdbFileTest.java delete mode 100644 src/test/resources/peaks-05495000.rdb delete mode 100644 src/test/resources/peaks.rdb diff --git a/Readme.md b/Readme.md index 0ecddf3..06d7dfc 100644 --- a/Readme.md +++ b/Readme.md @@ -83,6 +83,11 @@ implementation("org.opendcs:usgs-waterdata-api:0.3.*") System.out.println("Peak " + peakStage.getParameterName() + " (" + peakStage.getUnitOfMeasure() + "):"); peakStage.printToConsole(5); + // Read the stage-discharge rating curve (raw RDB text) + System.out.println("\nRead Rating Curve, Snake River near Moran, WY"); + String ratings = UsgsWaterDataApi.getRatings("USGS-13011000"); + ratings.lines().limit(20).forEach(System.out::println); + ```output.txt Read Daily Mean Discharge, Boise River at Parma Station: USGS-13213000 @@ -142,6 +147,28 @@ Peak Gage height (ft): 1947-05-09T00:00:00Z = 6.75 1948-05-29T00:00:00Z = 6.87 1949-05-17T00:00:00Z = 6.15 + +Read Rating Curve, Snake River near Moran, WY +# //UNITED STATES GEOLOGICAL SURVEY http://water.usgs.gov/ +# //NATIONAL WATER INFORMATION SYSTEM http://water.usgs.gov/data.html +# //DATA ARE PROVISIONAL AND SUBJECT TO CHANGE UNTIL PUBLISHED BY USGS +# //RETRIEVED: 2026-05-08 22:20:02 +# //WARNING +# //WARNING The stage-discharge rating provided in this file should be +# //WARNING considered provisional and subject to change. Stage-discharge +# //WARNING ratings change over time as the channel features that control +# //WARNING the relation between stage and discharge vary. Users are +# //WARNING cautioned to consider carefully the applicability of this +# //WARNING rating before using it for decisions that concern personal or +# //WARNING public safety or operational consequences. +# //FILE TYPE="NWIS RATING" +# //DATABASE NUMBER=01 DESCRIPTION=" Standard data base for this site." +# //STATION AGENCY="USGS " NUMBER="13011000 " TIME_ZONE="MST" DST_FLAG=Y +# //STATION NAME="SNAKE RIVER NEAR MORAN, WY" +# //LABEL="Discharge (ft^3/s)" +# //PARAMETER CODE="00060" +# //RATING SHIFTED="20260508222002 MST" +# //RATING ID="1.0" TYPE="STGQ" NAME="stage-discharge" AGING=???? ``` @@ -168,9 +195,4 @@ To save API responses to `~/usgs.waterdata/` for inspection, add the JVM flag: -Dusgs.debug=true ``` -Files are named from the response `Content-Disposition` header. Duplicate filenames get an incrementing suffix (e.g. `daily.csv`, `daily1.csv`, `daily2.csv`). - - -# TODO - - - /ratings \ No newline at end of file +Files are named `_` after the request, e.g. `daily_4993ed5a....csv`, `time-series-metadata_USGS-13037500.csv`, or `ratings_13011000.rdb`. Duplicate filenames get an incrementing suffix (e.g. `daily_....csv`, `daily_...1.csv`). diff --git a/src/main/java/org/opendcs/Demo.java b/src/main/java/org/opendcs/Demo.java index 94dbfc7..5ab79f0 100644 --- a/src/main/java/org/opendcs/Demo.java +++ b/src/main/java/org/opendcs/Demo.java @@ -78,5 +78,10 @@ public static void main(String[] args) throws Exception { peakFlow.printToConsole(5); System.out.println("Peak " + peakStage.getParameterName() + " (" + peakStage.getUnitOfMeasure() + "):"); peakStage.printToConsole(5); + + // Read the stage-discharge rating curve (raw RDB text) + System.out.println("\nRead Rating Curve, Snake River near Moran, WY"); + String ratings = UsgsWaterDataApi.getRatings("USGS-13011000"); + ratings.lines().limit(20).forEach(System.out::println); } } \ No newline at end of file diff --git a/src/main/java/org/opendcs/usgs/waterdata/PeakFlowService.java b/src/main/java/org/opendcs/usgs/waterdata/PeakFlowService.java deleted file mode 100644 index da9d4eb..0000000 --- a/src/main/java/org/opendcs/usgs/waterdata/PeakFlowService.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.opendcs.usgs.waterdata; - -import java.time.Instant; -import java.time.LocalDate; -import java.time.LocalTime; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeParseException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Retrieves annual peak streamflow and gage height from the legacy NWIS - * peak-flow service (tab-delimited RDB format). - * - * @see NWIS Peak Streamflow - */ -class PeakFlowService { - - static final String LEGACY_PEAK_URL = - "https://nwis.waterdata.usgs.gov/nwis/peak?format=rdb&date_format=MM%%2FDD%%2FYYYY&site_no=%s"; - static final String LEGACY_PEAK_URL_RANGE = LEGACY_PEAK_URL + "&begin_date=%s&end_date=%s"; - - private static final DateTimeFormatter PEAK_DATE_FMT = DateTimeFormatter.ofPattern("MM/dd/yyyy"); - - private PeakFlowService() {} - - /** - * Retrieves annual peak streamflow and gage height. - * - *

Returns two {@link TimeSeries}: one for peak discharge ({@code peak_va}) and - * one for gage height ({@code gage_ht}). Both share the same timestamps derived - * from {@code peak_dt} (and {@code peak_tm} when available). - * - * @param siteMetadata the full list of {@link TimeSeriesMetadata} for the site, - * used to look up parameter names and units - * @return list of TimeSeries (discharge and stage), one entry per water year - * @throws Exception on network or parsing errors - */ - static List> getAnnualPeaks( - List siteMetadata) throws Exception { - return getAnnualPeaks(siteMetadata, null, null); - } - - /** - * Retrieves annual peak streamflow and gage height within a date range. - * - * @param siteMetadata the full list of {@link TimeSeriesMetadata} for the site - * @param startDate start of date range (yyyy-MM-dd), or null for no lower bound - * @param endDate end of date range (yyyy-MM-dd), or null for no upper bound - * @return list of TimeSeries (discharge and stage), one entry per water year - * @throws Exception on network or parsing errors - */ - static List> getAnnualPeaks( - List siteMetadata, String startDate, String endDate) throws Exception { - if (siteMetadata.isEmpty()) return Collections.emptyList(); - - String monLocId = siteMetadata.get(0).monitoringLocationId; - String siteNo = monLocId.startsWith("USGS-") ? monLocId.substring(5) : monLocId; - - String url; - if (startDate != null && endDate != null) { - url = String.format(LEGACY_PEAK_URL_RANGE, siteNo, startDate, endDate); - } else { - url = String.format(LEGACY_PEAK_URL, siteNo); - } - String rdb = WebUtility.getPage(url); - if (rdb == null || rdb.isBlank()) return Collections.emptyList(); - - DataTable table = RdbFile.fromString(rdb); - List flows = new ArrayList<>(); - List stages = new ArrayList<>(); - - for (int row = 0; row < table.getRowCount(); row++) { - Instant time = parsePeakTime(table, row); - flows.add(new InstantaneousValue(time, table.getDouble(row, "peak_va", UsgsWaterDataApi.UNDEFINED_DOUBLE))); - stages.add(new InstantaneousValue(time, table.getDouble(row, "gage_ht", UsgsWaterDataApi.UNDEFINED_DOUBLE))); - } - - List> result = new ArrayList<>(); - result.add(new TimeSeries<>(peakMetadata(siteMetadata, Parameter.DISCHARGE), flows)); - result.add(new TimeSeries<>(peakMetadata(siteMetadata, Parameter.STAGE), stages)); - return result; - } - - static Instant parsePeakTime(DataTable table, int row) { - String dateStr = table.get(row, "peak_dt"); - LocalDate date = LocalDate.parse(dateStr, PEAK_DATE_FMT); - String timeStr = table.get(row, "peak_tm"); - if (timeStr != null && !timeStr.isBlank()) { - try { - LocalTime lt = LocalTime.parse(timeStr, DateTimeFormatter.ofPattern("HH:mm")); - return date.atTime(lt).toInstant(ZoneOffset.UTC); - } catch (DateTimeParseException e) { - // fall through to date-only - } - } - return date.atStartOfDay().toInstant(ZoneOffset.UTC); - } - - private static TimeSeriesMetadata peakMetadata(List siteMetadata, String parameterCode) { - TimeSeriesMetadata pm = siteMetadata.stream() - .filter(ts -> parameterCode.equals(ts.parameterCode)) - .findFirst() - .map(source -> { - TimeSeriesMetadata m = new TimeSeriesMetadata(); - m.monitoringLocationId = source.monitoringLocationId; - m.parameterCode = source.parameterCode; - m.parameterName = source.parameterName; - m.unitOfMeasure = source.unitOfMeasure; - return m; - }) - .orElseGet(() -> { - TimeSeriesMetadata m = new TimeSeriesMetadata(); - m.monitoringLocationId = siteMetadata.get(0).monitoringLocationId; - m.parameterCode = parameterCode; - return m; - }); - pm.computationPeriodIdentifier = "Water Year"; - pm.computationIdentifier = "Max At Event Time"; - return pm; - } -} diff --git a/src/main/java/org/opendcs/usgs/waterdata/RdbFile.java b/src/main/java/org/opendcs/usgs/waterdata/RdbFile.java deleted file mode 100644 index ee60104..0000000 --- a/src/main/java/org/opendcs/usgs/waterdata/RdbFile.java +++ /dev/null @@ -1,72 +0,0 @@ -package org.opendcs.usgs.waterdata; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.StringReader; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; - -/** - * Reads USGS RDB (tab-delimited) files into a {@link DataTable}. - * - *

RDB format rules: - *

    - *
  • Lines starting with {@code #} are comments and are skipped.
  • - *
  • The first non-comment line is the column header row (tab-separated).
  • - *
  • The second non-comment line describes column widths/types (e.g. {@code 10d}, {@code 8s}) and is skipped.
  • - *
  • Remaining lines are tab-separated data rows.
  • - *
- */ -class RdbFile extends DataTable { - - public RdbFile(String filename) throws IOException { - this(Path.of(filename)); - } - - public RdbFile(Path path) throws IOException { - this(Files.newBufferedReader(path)); - } - - public RdbFile(BufferedReader reader) throws IOException { - super(parse(reader)); - } - - public static RdbFile fromString(String rdbContent) throws IOException { - return new RdbFile(new BufferedReader(new StringReader(rdbContent))); - } - - private static ParseResult parse(BufferedReader reader) throws IOException { - try (reader) { - String line; - - // Skip comment lines starting with # - while ((line = reader.readLine()) != null) { - if (!line.startsWith("#")) { - break; - } - } - - if (line == null) { - return new ParseResult(new String[0], new ArrayList<>()); - } - - // First non-comment line is the column headers - String[] columnNames = line.split("\t", -1); - - // Second non-comment line is the type/width descriptor — skip it - reader.readLine(); - - // Remaining lines are data - List rows = new ArrayList<>(); - while ((line = reader.readLine()) != null) { - if (line.startsWith("#") || line.isBlank()) { - continue; - } - rows.add(line.split("\t", -1)); - } - return new ParseResult(columnNames, rows); - } - } -} diff --git a/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java b/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java index feef683..5fc6f12 100644 --- a/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java +++ b/src/main/java/org/opendcs/usgs/waterdata/UsgsWaterDataApi.java @@ -37,6 +37,7 @@ public class UsgsWaterDataApi { static final long MAX_WINDOW_DAYS = 1000; static final String ROOT_URL = "https://api.waterdata.usgs.gov/ogcapi/v0/collections/"; + static final String RATINGS_URL = "https://api.waterdata.usgs.gov/stac-files/ratings/USGS.%s.exsa.rdb"; static final String LOCATIONS_URL = ROOT_URL + "monitoring-locations/items?f=csv&lang=en-US&limit=50000&offset=0&agency_code=USGS&state_code=%s&site_type_code=%s"; static final String TIME_SERIES_QUERY_ID = "items?f=csv&lang=en-US&limit=50000&properties=time,value&skipGeometry=true&sortby=time&offset=0&time_series_id=%s&time=%s/%s"; static final String DAILY_URL_ID = ROOT_URL + "daily/" + TIME_SERIES_QUERY_ID; @@ -90,20 +91,33 @@ public static String getApiKey() { } - private static List fetchDailyValues(String timeSeriesId, String startDate, String endDate) throws Exception { - if (TestSite.isTestSeriesId(timeSeriesId)) + private static List fetchDailyValues(TimeSeriesMetadata metadata, String startDate, String endDate) throws Exception { + if (TestSite.isTestSeriesId(metadata.id)) return TestSite.generateDailyValues(startDate, endDate); - String url = String.format(DAILY_URL_ID, timeSeriesId, startDate, endDate); - String csv = WebUtility.getPage(url); + String url = String.format(DAILY_URL_ID, metadata.id, startDate, endDate); + String csv = WebUtility.getPage(url, debugName("daily", metadata)); if (csv == null || csv.isBlank()) return Collections.emptyList(); return DailyValue.ensureContinuous(CsvFile.fromString(csv).mapRows(DailyValue::fromRow)); } + /** Builds a readable debug filename like {@code continuous_USGS-11447650_Temperature_water_Instantaneous_BGC_PROJECT}. */ + private static String debugName(String collection, TimeSeriesMetadata m) { + StringBuilder name = new StringBuilder(collection).append("_").append(m.monitoringLocationId); + appendIfPresent(name, m.parameterName); + appendIfPresent(name, m.computationIdentifier); + appendIfPresent(name, m.sublocationIdentifier); + return name.toString(); + } + + private static void appendIfPresent(StringBuilder name, String value) { + if (value != null && !value.isEmpty()) name.append("_").append(value); + } + /** * Fetches continuous values for a date range, paging in chunks when necessary. */ - private static List fetchContinuousValues(String timeSeriesId, String startDate, String endDate) throws Exception { - if (TestSite.isTestSeriesId(timeSeriesId)) + private static List fetchContinuousValues(TimeSeriesMetadata metadata, String startDate, String endDate) throws Exception { + if (TestSite.isTestSeriesId(metadata.id)) return TestSite.generateContinuousValues(startDate, endDate); Instant rangeEnd = Instant.parse(normalizeToInstant(endDate)); @@ -115,8 +129,8 @@ private static List fetchContinuousValues(String timeSeriesI Instant windowEnd = chunkStart.plus(MAX_WINDOW_DAYS, ChronoUnit.DAYS); if (windowEnd.isAfter(rangeEnd)) windowEnd = rangeEnd; - List page = fetchContinuousPage(timeSeriesId, chunkStart, windowEnd); - lastSeen = appendDeduplicated(all, page, lastSeen, timeSeriesId); + List page = fetchContinuousPage(metadata, chunkStart, windowEnd); + lastSeen = appendDeduplicated(all, page, lastSeen, metadata.id); if (page.size() >= PAGE_LIMIT && lastSeen != null && lastSeen.isAfter(chunkStart)) { // Response was truncated at the page limit; resume from its last point. @@ -132,9 +146,9 @@ private static List fetchContinuousValues(String timeSeriesI /** * Fetches a single page of continuous values for one time window. */ - private static List fetchContinuousPage(String timeSeriesId, Instant start, Instant end) throws Exception { - String url = String.format(CONTINUOUS_URL_ID, timeSeriesId, start.toString(), end.toString()); - String csv = WebUtility.getPage(url); + private static List fetchContinuousPage(TimeSeriesMetadata metadata, Instant start, Instant end) throws Exception { + String url = String.format(CONTINUOUS_URL_ID, metadata.id, start.toString(), end.toString()); + String csv = WebUtility.getPage(url, debugName("continuous", metadata)); if (csv == null || csv.isBlank()) return Collections.emptyList(); return CsvFile.fromString(csv).mapRows(InstantaneousValue::fromRow); } @@ -180,7 +194,7 @@ static String normalizeToInstant(String dateOrDateTime) { */ public static TimeSeries getDailyTimeSeries(TimeSeriesMetadata metadata, String startDate, String endDate) throws Exception { - return new TimeSeries<>(metadata, fetchDailyValues(metadata.id, startDate, endDate)); + return new TimeSeries<>(metadata, fetchDailyValues(metadata, startDate, endDate)); } /** @@ -193,12 +207,12 @@ public static TimeSeries getDailyTimeSeries(TimeSeriesMetadata metad */ public static TimeSeries getContinuousTimeSeries(TimeSeriesMetadata metadata, String startDate, String endDate) throws Exception { - return new TimeSeries<>(metadata, fetchContinuousValues(metadata.id, startDate, endDate)); + return new TimeSeries<>(metadata, fetchContinuousValues(metadata, startDate, endDate)); } public static List getLocations(String stateCode, String siteTypeCode) throws Exception { String url = String.format(LOCATIONS_URL, stateCode, siteTypeCode); - String csv = WebUtility.getPage(url); + String csv = WebUtility.getPage(url, "monitoring-locations"); if (csv == null || csv.isBlank()) return Collections.emptyList(); return CsvFile.fromString(csv).mapRows(MonitoringLocation::fromRow); } @@ -207,7 +221,7 @@ public static List getTimeSeriesMetadata(String monitoringLo if (TestSite.isTestSite(monitoringLocationId)) return TestSite.generateMetadata(); String url = String.format(TIME_SERIES_METADATA_URL, monitoringLocationId); - String csv = WebUtility.getPage(url); + String csv = WebUtility.getPage(url, "time-series-metadata_" + monitoringLocationId); if (csv == null || csv.isBlank()) return Collections.emptyList(); return CsvFile.fromString(csv).mapRows(TimeSeriesMetadata::fromRow); } @@ -215,7 +229,7 @@ public static List getTimeSeriesMetadata(String monitoringLo public static String postPage(String url, String propertyName, String[] items) throws Exception { String json = buildCqlInFilter(propertyName, items); String cacheKey = url + "|" + propertyName + "|" + String.join(",", items); - return WebUtility.postPage(url, "application/query-cql-json", json, cacheKey); + return WebUtility.postPage(url, "application/query-cql-json", json, cacheKey, "time-series-metadata"); } private static String buildCqlInFilter(String propertyName, String[] items) { @@ -272,23 +286,41 @@ public static TimeSeries getAnnualPeaks(TimeSeriesMetadata m */ public static TimeSeries getAnnualPeaks(TimeSeriesMetadata metadata, String startDate, String endDate) throws Exception { - return new TimeSeries<>(metadata, fetchPeaks(metadata.id, startDate, endDate)); + return new TimeSeries<>(metadata, fetchPeaks(metadata, startDate, endDate)); } /** * Fetches annual peaks for one time series from the peaks collection. */ - private static List fetchPeaks(String timeSeriesId, + private static List fetchPeaks(TimeSeriesMetadata metadata, String startDate, String endDate) throws Exception { String url; if (startDate != null && endDate != null) { - url = String.format(PEAKS_URL_RANGE, timeSeriesId, startDate, endDate); + url = String.format(PEAKS_URL_RANGE, metadata.id, startDate, endDate); } else { - url = String.format(PEAKS_URL, timeSeriesId); + url = String.format(PEAKS_URL, metadata.id); } - String csv = WebUtility.getPage(url); + String csv = WebUtility.getPage(url, debugName("peaks", metadata)); if (csv == null || csv.isBlank()) return Collections.emptyList(); return CsvFile.fromString(csv).mapRows(InstantaneousValue::fromPeakRow); } + /** + * Retrieves the expanded, shift-adjusted (exsa) stage-discharge rating for a site + * as raw RDB text. + * @param siteId site number ("13011000") or monitoring-location id ("USGS-13011000") + */ + public static String getRatings(String siteId) throws Exception { + String siteNo = siteNumber(siteId); + String url = String.format(RATINGS_URL, siteNo); + return WebUtility.getPage(url, "ratings_" + siteNo); + } + + /** + * Extracts the bare USGS site number from a "USGS-" or "USGS." prefixed id. + */ + private static String siteNumber(String siteId) { + return siteId.replaceFirst("(?i)^USGS[-.]", ""); + } + } diff --git a/src/main/java/org/opendcs/usgs/waterdata/WebUtility.java b/src/main/java/org/opendcs/usgs/waterdata/WebUtility.java index fd1c584..77ff2c0 100644 --- a/src/main/java/org/opendcs/usgs/waterdata/WebUtility.java +++ b/src/main/java/org/opendcs/usgs/waterdata/WebUtility.java @@ -10,8 +10,6 @@ import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * HTTP utility for fetching web pages with caching. @@ -29,8 +27,6 @@ class WebUtility { private static final int PAGE_CACHE_MAX_SIZE = 100; private static final ConcurrentHashMap PAGE_CACHE = new ConcurrentHashMap<>(); private static boolean apiKeyLogged = false; - private static final Pattern COLLECTION_NAME_PATTERN = - Pattern.compile("/collections/(?[a-z][a-z0-9\\-]*)/items\\b"); private static class CacheEntry { final String body; @@ -46,19 +42,19 @@ private WebUtility() { // Prevent instantiation } - public static String getPage(String url) throws Exception { + public static String getPage(String url, String debugName) throws Exception { logger.info("Requesting: " + url); HttpRequest request = buildRequest(url).GET().build(); - return fetchPage(url, request); + return fetchPage(url, request, debugName); } - public static String postPage(String url, String contentType, String body, String cacheKey) throws Exception { + public static String postPage(String url, String contentType, String body, String cacheKey, String debugName) throws Exception { logger.info("POST: " + url); HttpRequest request = buildRequest(url) .header("Content-Type", contentType) .POST(HttpRequest.BodyPublishers.ofString(body)) .build(); - return fetchPage(cacheKey, request); + return fetchPage(cacheKey, request, debugName); } static final String GITHUB_URL = "https://github.com/opendcs/usgs-waterdata-api"; @@ -90,7 +86,7 @@ static String maskKey(String key) { return key.substring(0, show) + "*".repeat(key.length() - show); } - static String fetchPage(String cacheKey, HttpRequest request) throws Exception { + static String fetchPage(String cacheKey, HttpRequest request, String debugName) throws Exception { long now = System.currentTimeMillis(); CacheEntry cached = PAGE_CACHE.get(cacheKey); if (cached != null) { @@ -136,8 +132,7 @@ static String fetchPage(String cacheKey, HttpRequest request) throws Exception { if (Boolean.getBoolean("usgs.debug")) { - String prefix = filenamePrefixFromUrl(response.uri().toString()); - saveForDebugging(response, responseBody, prefix); + saveForDebugging(response, responseBody, debugName); } if (PAGE_CACHE.size() >= PAGE_CACHE_MAX_SIZE) { @@ -147,15 +142,13 @@ static String fetchPage(String cacheKey, HttpRequest request) throws Exception { return responseBody; } - static String filenamePrefixFromUrl(String url) { - Matcher m = COLLECTION_NAME_PATTERN.matcher(url); - if (m.find()) { - return m.group("collection"); - } - return ""; + /** Collapses any run of non-filename-safe characters to a single underscore so the name is usable on disk. */ + private static String sanitize(String name) { + return name.replaceAll("[^A-Za-z0-9._-]+", "_"); } - private static void saveForDebugging(HttpResponse response, String body, String filenamePrefix) { + private static void saveForDebugging(HttpResponse response, String body, String debugName) { + String filenamePrefix = (debugName == null || debugName.isBlank()) ? "response" : sanitize(debugName); try { Optional disposition = response.headers().firstValue("Content-Disposition"); String filename = null; @@ -168,17 +161,14 @@ private static void saveForDebugging(HttpResponse response, String body, } } if (filename == null || filename.isEmpty()) { - // Use the prefix (e.g. "continuous") as the base name - if (!filenamePrefix.isEmpty()) { - filename = filenamePrefix; - } else { - filename = "response"; - } - // Determine extension from f= query parameter + filename = filenamePrefix; + // Determine extension from the f= query parameter, or the URL path for stac files String query = response.uri().getQuery(); String ext = ".txt"; if (query != null && query.contains("f=csv")) { ext = ".csv"; + } else if (response.uri().getPath().endsWith(".rdb")) { + ext = ".rdb"; } filename += ext; } @@ -195,7 +185,7 @@ private static void saveForDebugging(HttpResponse response, String body, } int n = 1; while (Files.exists(file)) { - file = dir.resolve(name + n + ext); + file = dir.resolve(name + "_" + n + ext); n++; } } diff --git a/src/test/java/org/opendcs/usgs/waterdata/PeakFlowServiceTest.java b/src/test/java/org/opendcs/usgs/waterdata/PeakFlowServiceTest.java deleted file mode 100644 index 80af37f..0000000 --- a/src/test/java/org/opendcs/usgs/waterdata/PeakFlowServiceTest.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.opendcs.usgs.waterdata; - -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.nio.file.Path; -import java.time.Instant; - -import static org.junit.jupiter.api.Assertions.*; - -class PeakFlowServiceTest { - - static DataTable table; - - @BeforeAll - static void loadPeaks() throws Exception { - Path path = Path.of(PeakFlowServiceTest.class.getResource("/peaks-05495000.rdb").toURI()); - table = new RdbFile(path); - } - - @Test - void emptyPeakTime_returnsStartOfDay() { - // row 0: USGS 05495000 07/12/1922 (no time) - Instant time = PeakFlowService.parsePeakTime(table, 0); - assertEquals(Instant.parse("1922-07-12T00:00:00Z"), time); - } - - @Test - void peakTimeWithHHmm_parsesCorrectly() { - // row 84: USGS 05495000 03/13/2006 15:30 - int row = findRowByDate("03/13/2006"); - Instant time = PeakFlowService.parsePeakTime(table, row); - assertEquals(Instant.parse("2006-03-13T15:30:00Z"), time); - } - - @Test - void stageAndFlowCountMatchRows() { - // each row produces exactly one flow value and one stage value - int expectedCount = 104; - assertEquals(expectedCount, table.getRowCount()); - - int flowCount = 0; - int stageCount = 0; - for (int row = 0; row < table.getRowCount(); row++) { - double flow = table.getDouble(row, "peak_va", UsgsWaterDataApi.UNDEFINED_DOUBLE); - if (flow != UsgsWaterDataApi.UNDEFINED_DOUBLE) flowCount++; - double stage = table.getDouble(row, "gage_ht", UsgsWaterDataApi.UNDEFINED_DOUBLE); - if (stage != UsgsWaterDataApi.UNDEFINED_DOUBLE) stageCount++; - } - assertEquals(104, flowCount, "non-missing flow values"); - assertEquals(104, stageCount, "non-missing stage values"); - } - - private int findRowByDate(String date) { - for (int i = 0; i < table.getRowCount(); i++) { - if (date.equals(table.get(i, "peak_dt"))) return i; - } - throw new AssertionError("Row not found for date: " + date); - } -} diff --git a/src/test/java/org/opendcs/usgs/waterdata/RdbFileTest.java b/src/test/java/org/opendcs/usgs/waterdata/RdbFileTest.java deleted file mode 100644 index d2ed745..0000000 --- a/src/test/java/org/opendcs/usgs/waterdata/RdbFileTest.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.opendcs.usgs.waterdata; - -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.*; - -class RdbFileTest { - - static DataTable rdb; - - @BeforeAll - static void loadRdb() throws Exception { - Path rdbPath = Path.of(RdbFileTest.class.getResource("/peaks.rdb").toURI()); - rdb = new RdbFile(rdbPath); - } - - @Test - void columnNames() { - String[] cols = rdb.getColumnNames(); - assertEquals("agency_cd", cols[0]); - assertEquals("site_no", cols[1]); - assertEquals("peak_dt", cols[2]); - assertEquals("peak_tm", cols[3]); - assertEquals("peak_va", cols[4]); - } - - @Test - void rowCount() { - assertEquals(3, rdb.getRowCount()); - } - - @Test - void dataValues() { - assertEquals("USGS", rdb.get(0, "agency_cd")); - assertEquals("05495000", rdb.get(0, "site_no")); - assertEquals("07/12/1922", rdb.get(0, "peak_dt")); - assertEquals(2400.0, rdb.getDouble(0, "peak_va", -1), 0.0); - assertEquals(11.90, rdb.getDouble(0, "gage_ht", -1), 0.001); - } - - @Test - void secondRow() { - assertEquals("03/16/1923", rdb.get(1, "peak_dt")); - assertEquals(1980.0, rdb.getDouble(1, "peak_va", -1), 0.0); - } - - @Test - void thirdRow() { - assertEquals("08/06/1924", rdb.get(2, "peak_dt")); - assertEquals(3250.0, rdb.getDouble(2, "peak_va", -1), 0.0); - } - - @Test - void fromString_parsesCorrectly() throws Exception { - String content = "# comment line\nname\tvalue\n5s\t8s\nAlice\t42\nBob\t99\n"; - DataTable table = RdbFile.fromString(content); - assertEquals(2, table.getRowCount()); - assertEquals("Alice", table.get(0, "name")); - assertEquals(99.0, table.getDouble(1, "value", -1), 0.0); - } - - @Test - void emptyTimeMeansBlank() { - // peak_tm is empty for all rows in our test data - assertEquals("", rdb.get(0, "peak_tm")); - } -} diff --git a/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java b/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java index ccaf094..a7a3126 100644 --- a/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java +++ b/src/test/java/org/opendcs/usgs/waterdata/UsgsWaterDataApiTest.java @@ -388,4 +388,20 @@ void annualPeaks_metadataScenario() throws Exception { } } + /** + * Retrieves the raw RDB stage-discharge rating for a site. + * + * ./gradlew integrationTest --tests "org.opendcs.usgs.waterdata.UsgsWaterDataApiTest.getRatings" -PusgsDebug=true + */ + @Test + @Tag("integration") + void getRatings() throws Exception { + String ratings = UsgsWaterDataApi.getRatings("USGS-13011000"); + assertNotNull(ratings, "Expected rating text"); + assertFalse(ratings.isBlank(), "Expected non-empty rating text"); + // RDB files begin with comment lines prefixed by '#' + assertTrue(ratings.startsWith("#"), "Expected RDB content to start with a comment line"); + logger.info("Rating text length: " + ratings.length()); + } + } diff --git a/src/test/resources/peaks-05495000.rdb b/src/test/resources/peaks-05495000.rdb deleted file mode 100644 index 94846fd..0000000 --- a/src/test/resources/peaks-05495000.rdb +++ /dev/null @@ -1,178 +0,0 @@ -# -# U.S. Geological Survey -# National Water Information System -# Retrieved: 2026-03-24 16:11:02 EDT -# -# ---------------------------------- WARNING ---------------------------------------- -# Some of the data that you have obtained from this U.S. Geological Survey database -# may not have received Director's approval. Any such data values are qualified -# as provisional and are subject to revision. Provisional data are released on the -# condition that neither the USGS nor the United States Government may be held liable -# for any damages resulting from its use. -# -# More data may be available offline. -# For more information on these data, contact USGS Water Data Inquiries. -# This file contains the annual peak streamflow data. -# -# This information includes the following fields: -# -# agency_cd Agency Code -# site_no USGS station number -# peak_dt Date of peak streamflow (format MM/DD/YYYY) -# peak_tm Time of peak streamflow (24 hour format, 00:00 - 23:59) -# peak_va Annual peak streamflow value in cfs -# peak_cd Peak Discharge-Qualification codes (see explanation below) -# gage_ht Gage height for the associated peak streamflow in feet -# gage_ht_cd Gage height qualification codes -# year_last_pk Peak streamflow reported is the highest since this year -# ag_dt Date of maximum gage-height for water year (if not concurrent with peak) -# ag_tm Time of maximum gage-height for water year (if not concurrent with peak -# ag_gage_ht maximum Gage height for water year in feet (if not concurrent with peak -# ag_gage_ht_cd maximum Gage height code -# -# Sites in this file include: -# USGS 05495000 Fox River at Wayland, MO -# -# Peak Streamflow-Qualification Codes(peak_cd): -# 1 ... Discharge is a Maximum Daily Average -# 2 ... Discharge is an Estimate -# 3 ... Discharge affected by Dam Failure -# 4 ... Discharge less than indicated value, -# which is Minimum Recordable Discharge at this site -# 5 ... Discharge affected to unknown degree by -# Regulation or Diversion -# 6 ... Discharge affected by Regulation or Diversion -# 7 ... Discharge is an Historic Peak -# 8 ... Discharge actually greater than indicated value -# 9 ... Discharge due to Snowmelt, Hurricane, -# Ice-Jam or Debris Dam breakup -# A ... Year of occurrence is unknown or not exact -# Bd ... Day of occurrence is unknown or not exact -# Bm ... Month of occurrence is unknown or not exact -# C ... All or part of the record affected by Urbanization, -# Mining, Agricultural changes, Channelization, or other -# F ... Peak supplied by another agency -# O ... Opportunistic value not from systematic data collection -# R ... Revised -# -# Gage height qualification codes(gage_ht_cd,ag_gage_ht_cd): -# 1 ... Gage height affected by backwater -# 2 ... Gage height not the maximum for the year -# 3 ... Gage height at different site and(or) datum -# 4 ... Gage height below minimum recordable elevation -# 5 ... Gage height is an estimate -# 6 ... Gage datum changed during this year -# 7 ... Debris, mud, or hyper-concentrated flow -# 8 ... Gage height tidally affected -# Bd ... Day of occurrence is unknown or not exact -# Bm ... Month of occurrence is unknown or not exact -# F ... Peak supplied by another agency -# R ... Revised -# -# -agency_cd site_no peak_dt peak_tm peak_va peak_cd gage_ht gage_ht_cd year_last_pk ag_dt ag_tm ag_gage_ht ag_gage_ht_cd -5s 15s 10d 6s 8s 33s 8s 27s 4s 10d 6s 8s 27s -USGS 05495000 07/12/1922 2400 11.90 5 -USGS 05495000 03/16/1923 1980 11.00 5 -USGS 05495000 08/06/1924 3250 13.60 5 -USGS 05495000 04/26/1925 3760 13.80 5 -USGS 05495000 09/17/1926 6570 14.90 5 -USGS 05495000 04/20/1927 7300 15.50 5 -USGS 05495000 10/01/1927 8100 19.10 3 -USGS 05495000 11/18/1928 16100 20.00 3 -USGS 05495000 06/16/1930 3460 14.16 -USGS 05495000 06/07/1931 9940 18.35 -USGS 05495000 11/24/1931 6440 16.85 -USGS 05495000 06/29/1933 25000 21.53 -USGS 05495000 04/05/1934 1780 10.92 -USGS 05495000 06/02/1935 13300 19.38 -USGS 05495000 02/26/1936 8060 17.65 -USGS 05495000 03/05/1937 3540 13.72 2 1937-02-22 18.52 1 -USGS 05495000 04/06/1938 4070 14.88 -USGS 05495000 03/13/1939 9260 18.22 -USGS 05495000 04/24/1940 1640 9.08 -USGS 05495000 06/11/1941 3080 12.75 -USGS 05495000 10/11/1941 4510 15.80 -USGS 05495000 05/17/1943 5290 16.45 -USGS 05495000 04/24/1944 10200 18.50 -USGS 05495000 06/17/1945 6810 17.34 -USGS 05495000 06/19/1946 19900 20.66 -USGS 05495000 06/07/1947 12200 19.12 -USGS 05495000 03/20/1948 11900 18.20 -USGS 05495000 04/01/1949 3350 12.90 2 1949-02-20 15.50 1 -USGS 05495000 06/16/1950 9560 17.79 -USGS 05495000 05/12/1951 5250 15.27 2 1951-02-20 15.40 1 -USGS 05495000 06/23/1952 6400 16.30 -USGS 05495000 04/01/1953 7960 17.20 -USGS 05495000 04/21/1954 4050 13.60 -USGS 05495000 01/06/1955 6000 15.98 -USGS 05495000 08/09/1956 1030 4 6.98 -USGS 05495000 06/11/1957 6130 16.35 -USGS 05495000 07/31/1958 4750 15.51 -USGS 05495000 08/08/1959 9840 18.33 -USGS 05495000 03/30/1960 13400 20.17 -USGS 05495000 09/14/1961 4290 14.69 -USGS 05495000 03/12/1962 6480 16.82 -USGS 05495000 03/05/1963 5760 16.27 -USGS 05495000 04/21/1964 6180 16.79 -USGS 05495000 04/06/1965 5370 15.97 -USGS 05495000 06/13/1966 2730 11.99 -USGS 05495000 04/02/1967 5100 15.73 -USGS 05495000 10/31/1967 3330 13.22 -USGS 05495000 01/17/1969 5400 16.04 -USGS 05495000 09/23/1970 12800 19.84 -USGS 05495000 10/11/1970 3150 13.79 2 1971-02-19 18.02 1 -USGS 05495000 12/17/1971 4140 14.92 -USGS 05495000 04/22/1973 26400 21.71 -USGS 05495000 05/30/1974 5630 16.53 -USGS 05495000 04/24/1975 3390 13.75 -USGS 05495000 04/25/1976 13400 19.96 -USGS 05495000 09/24/1977 4430 12.68 -USGS 05495000 03/19/1978 8820 16.75 -USGS 05495000 03/30/1979 10200 17.50 -USGS 05495000 06/04/1980 14700 19.23 -USGS 05495000 07/06/1981 12300 18.53 -USGS 05495000 07/19/1982 11500 18.24 -USGS 05495000 04/03/1983 14800 19.25 -USGS 05495000 06/10/1984 6370 14.75 -USGS 05495000 02/22/1985 11500 18.24 -USGS 05495000 05/19/1986 13200 18.80 -USGS 05495000 10/03/1986 9300 16.92 -USGS 05495000 02/20/1988 1860 8.23 -USGS 05495000 09/10/1989 946 6.12 -USGS 05495000 06/21/1990 9040 16.61 -USGS 05495000 05/05/1991 4700 12.13 -USGS 05495000 04/20/1992 7980 15.81 -USGS 05495000 07/12/1993 11900 18.22 -USGS 05495000 04/22/1994 6410 14.77 -USGS 05495000 07/06/1995 11300 18.00 -USGS 05495000 05/27/1996 14300 19.05 -USGS 05495000 02/21/1997 5990 14.77 -USGS 05495000 06/30/1998 7270 15.78 -USGS 05495000 10/18/1998 08:49 6800 15.44 -USGS 05495000 07/11/2000 18:00 2990 10.24 -USGS 05495000 05/15/2001 01:30 9800 17.78 -USGS 05495000 05/13/2002 10900 18.35 5 -USGS 05495000 07/11/2003 8350 16.85 -USGS 05495000 08/28/2004 10900 18.37 -USGS 05495000 02/14/2005 04:30 5290 13.62 -USGS 05495000 03/13/2006 15:30 3450 10.67 -USGS 05495000 02/25/2007 7090 15.41 -USGS 05495000 09/15/2008 18600 20.61 -USGS 05495000 05/16/2009 9320 17.36 2 2009-03-10 17.60 -USGS 05495000 07/21/2010 13800 19.39 -USGS 05495000 06/15/2011 06:00 26200 23.07 -USGS 05495000 05/05/2012 05:30 11200 18.54 -USGS 05495000 04/19/2013 11900 18.90 -USGS 05495000 02/21/2014 8070 16.26 -USGS 05495000 06/26/2015 17:45 12400 19.15 -USGS 05495000 12/14/2015 14:45 6230 14.44 -USGS 05495000 04/06/2017 03:51 4840 12.68 -USGS 05495000 09/07/2018 03:52 7550 15.79 -USGS 05495000 05/30/2019 12:48 15800 20.38 -USGS 05495000 07/01/2020 7610 15.85 -USGS 05495000 07/12/2021 08:45 8390 16.53 -USGS 05495000 05/27/2022 06:45 3720 10.96 -USGS 05495000 08/05/2023 12:45 9680 17.56 -USGS 05495000 04/28/2024 23:00 9190 17.18 -USGS 05495000 07/27/2025 11:45 8500 16.63 diff --git a/src/test/resources/peaks.rdb b/src/test/resources/peaks.rdb deleted file mode 100644 index b57e5bd..0000000 --- a/src/test/resources/peaks.rdb +++ /dev/null @@ -1,15 +0,0 @@ -# -# U.S. Geological Survey -# National Water Information System -# Retrieved: 2026-03-23 -# -# This file contains the annual peak streamflow data. -# -# Data for the following 1 site(s) are contained in this file -# USGS 05495000 Fox River at Wayland, MO -# -agency_cd site_no peak_dt peak_tm peak_va peak_cd gage_ht gage_ht_cd year_last_pk ag_dt ag_tm ag_gage_ht ag_gage_ht_cd -5s 15s 10d 6s 8s 33s 8s 27s 4s 10d 6s 8s 27s -USGS 05495000 07/12/1922 2400 11.90 5 -USGS 05495000 03/16/1923 1980 11.00 5 -USGS 05495000 08/06/1924 3250 13.60 5