diff --git a/server/finders/dbfinder/stop.go b/server/finders/dbfinder/stop.go index e874b670..d5376062 100644 --- a/server/finders/dbfinder/stop.go +++ b/server/finders/dbfinder/stop.go @@ -312,27 +312,34 @@ func stopSelect(limit *int, after *model.Cursor, ids []int, useActive *UseActive if where.OnestopID != nil { where.OnestopIds = append(where.OnestopIds, *where.OnestopID) } - if len(where.OnestopIds) > 0 && where.AllowPreviousOnestopIds != nil && *where.AllowPreviousOnestopIds { - // Use CTE for stop lookup optimization - sub := sq.StatementBuilder. - Select( - "feed_version_stop_onestop_ids.onestop_id", - "feed_version_stop_onestop_ids.entity_id", - "feed_versions.feed_id", - ). - Distinct().Options("on (feed_version_stop_onestop_ids.onestop_id, feed_version_stop_onestop_ids.entity_id, feed_versions.feed_id)"). - From("feed_version_stop_onestop_ids"). - Join("feed_versions on feed_versions.id = feed_version_stop_onestop_ids.feed_version_id"). - Where(In("feed_version_stop_onestop_ids.onestop_id", where.OnestopIds)). - OrderBy("feed_version_stop_onestop_ids.onestop_id, feed_version_stop_onestop_ids.entity_id, feed_versions.feed_id, feed_versions.id DESC") - stopLookupCte := sq.CTE{ - Materialized: true, - Alias: "feed_version_stop_onestop_ids", - Expression: sub, + searchCte, searchOk := stopOnestopSearchCTE(where.OnestopIds) + if searchOk && where.AllowPreviousOnestopIds != nil && *where.AllowPreviousOnestopIds { + // Resolve (possibly historical) Onestop IDs as search keys: decode + // each to a point + name and match against current stops by location + // and name similarity, rather than a stored per-version association. + // See interline-io/tlv2#354. If no Onestop ID parses as a search key + // (searchOk is false), we fall through to the exact-match branch + // below, which returns empty for unknown ids. + // + // Source for the stop's current onestop_id, used to compare name + // components. On the materialized active path it is a column on the + // materialized table; otherwise it comes from a join to + // feed_version_stop_onestop_ids (matching the select expression). + curOsid := "feed_version_stop_onestop_ids.onestop_id" + if useActive.Active() && useActive.materialized { + curOsid = "gtfs_stops.onestop_id" + } else { + q = q.JoinClause(`LEFT JOIN feed_version_stop_onestop_ids ON feed_version_stop_onestop_ids.entity_id = gtfs_stops.stop_id and feed_version_stop_onestop_ids.feed_version_id = gtfs_stops.feed_version_id`) } q = q. - WithCTE(stopLookupCte). - Join("feed_version_stop_onestop_ids on feed_version_stop_onestop_ids.entity_id = gtfs_stops.stop_id and feed_version_stop_onestop_ids.feed_id = feed_versions.feed_id") + WithCTE(searchCte). + Join( + "stop_onestop_search on ST_DWithin(gtfs_stops.geometry, ST_MakePoint(stop_onestop_search.lng, stop_onestop_search.lat), ?) "+ + "and (split_part("+curOsid+", '-', 3) = stop_onestop_search.name_component "+ + "or word_similarity(stop_onestop_search.name_component, split_part("+curOsid+", '-', 3)) > ?)", + stopOnestopSearchRadius, stopOnestopSearchSimilarity, + ) + distinct = true } else { q = q.JoinClause(`LEFT JOIN feed_version_stop_onestop_ids ON feed_version_stop_onestop_ids.entity_id = gtfs_stops.stop_id and feed_version_stop_onestop_ids.feed_version_id = gtfs_stops.feed_version_id`) if len(where.OnestopIds) > 0 { diff --git a/server/finders/dbfinder/stop_onestop_search.go b/server/finders/dbfinder/stop_onestop_search.go new file mode 100644 index 00000000..fde25539 --- /dev/null +++ b/server/finders/dbfinder/stop_onestop_search.go @@ -0,0 +1,74 @@ +package dbfinder + +import ( + "strings" + + sq "github.com/irees/squirrel" + "github.com/mmcloughlin/geohash" +) + +// Tunables for resolving previous/looked-up stop Onestop IDs as search keys. +// A stop Onestop ID is "s--"; rather than storing every +// historical (feed_version, stop_id, onestop_id) association, we decode the id +// back into its content claim — a point (the geohash) and a name — and search +// current stops for a match. See interline-io/tlv2#354. +var ( + // Radius in meters around the decoded geohash point. Absorbs coordinate + // jitter between feed versions (the geohash-10 cell is ~1m; observed + // real-world renames move stops a few tens of meters). + stopOnestopSearchRadius = 100.0 + // pg_trgm word_similarity threshold for matching the requested name + // component against the stop's current name component. Both sides are + // filterName() output, so the comparison is in the same normalized space. + stopOnestopSearchSimilarity = 0.4 +) + +// parseStopOnestopID splits "s--" into the geohash center point +// and the name component. The name never contains '-' because filterName maps +// '-' to '~', so a 3-way split is exact. Returns ok=false for anything that is +// not a well-formed stop Onestop ID (wrong prefix, invalid geohash), so route/ +// operator ids or garbage do not decode to a bogus point and search there. +func parseStopOnestopID(osid string) (lat float64, lng float64, name string, ok bool) { + parts := strings.SplitN(osid, "-", 3) + if len(parts) != 3 || parts[0] != "s" { + return 0, 0, "", false + } + if err := geohash.Validate(parts[1]); err != nil { + return 0, 0, "", false + } + lat, lng = geohash.DecodeCenter(parts[1]) + return lat, lng, parts[2], true +} + +// stopOnestopSearchCTE builds a small VALUES CTE of parsed inputs, one row per +// requested Onestop ID, so that any number of inputs can be resolved with a +// single spatial join (one GiST index probe per row) instead of per-input +// queries. Returns ok=false if no input parsed. +func stopOnestopSearchCTE(osids []string) (sq.CTE, bool) { + var rows []string + var args []interface{} + for _, osid := range osids { + lat, lng, name, ok := parseStopOnestopID(osid) + if !ok { + continue + } + if len(rows) == 0 { + // Cast the first row so Postgres can resolve the VALUES column + // types; an all-placeholder VALUES otherwise errors with + // "could not determine data type of parameter". + rows = append(rows, "(?::text,?::text,?::double precision,?::double precision)") + } else { + rows = append(rows, "(?,?,?,?)") + } + args = append(args, osid, name, lng, lat) + } + if len(rows) == 0 { + return sq.CTE{}, false + } + return sq.CTE{ + Alias: "stop_onestop_search", + ColumnList: []string{"input_onestop_id", "name_component", "lng", "lat"}, + Materialized: true, + Expression: sq.Expr("VALUES "+strings.Join(rows, ", "), args...), + }, true +} diff --git a/server/gql/stop_resolver_test.go b/server/gql/stop_resolver_test.go index 4730ee41..e059fff8 100644 --- a/server/gql/stop_resolver_test.go +++ b/server/gql/stop_resolver_test.go @@ -982,11 +982,23 @@ func stopResolverPreviousOnestopIDTestcases(t testing.TB, cfg model.Config) []te selectExpect: []string{"s-9q9nfswzpg-fruitvale"}, }, { + // A previous Onestop ID is resolved as a search key (geohash point + + // name) against current stops, so it returns the stop's *current* + // canonical Onestop ID, not the historical id that was queried. name: "use previous", query: `query($osid:String!, $previous:Boolean!) { stops(where:{onestop_id:$osid, allow_previous_onestop_ids:$previous}) { stop_id onestop_id }}`, vars: hw{"osid": "s-9q9nfswzpg-fruitvale", "previous": true}, selector: "stops.#.onestop_id", - selectExpect: []string{"s-9q9nfswzpg-fruitvale"}, + selectExpect: []string{"s-9q9nfsxn67-fruitvale"}, + }, + { + // A malformed/non-stop Onestop ID has no valid search key, so the + // request must return empty rather than an unconstrained result. + name: "previous with malformed id returns empty", + query: `query($osid:String!, $previous:Boolean!) { stops(where:{onestop_id:$osid, allow_previous_onestop_ids:$previous}) { stop_id onestop_id }}`, + vars: hw{"osid": "not-a-onestop-id", "previous": true}, + selector: "stops.#.onestop_id", + selectExpect: []string{}, }, } return testcases