Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions server/finders/dbfinder/stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,27 +312,34 @@ func stopSelect(limit *int, after *model.Cursor, ids []int, useActive *UseActive
if where.OnestopID != nil {
where.OnestopIds = append(where.OnestopIds, *where.OnestopID)
}
if len(where.OnestopIds) > 0 && where.AllowPreviousOnestopIds != nil && *where.AllowPreviousOnestopIds {
// Use CTE for stop lookup optimization
sub := sq.StatementBuilder.
Select(
"feed_version_stop_onestop_ids.onestop_id",
"feed_version_stop_onestop_ids.entity_id",
"feed_versions.feed_id",
).
Distinct().Options("on (feed_version_stop_onestop_ids.onestop_id, feed_version_stop_onestop_ids.entity_id, feed_versions.feed_id)").
From("feed_version_stop_onestop_ids").
Join("feed_versions on feed_versions.id = feed_version_stop_onestop_ids.feed_version_id").
Where(In("feed_version_stop_onestop_ids.onestop_id", where.OnestopIds)).
OrderBy("feed_version_stop_onestop_ids.onestop_id, feed_version_stop_onestop_ids.entity_id, feed_versions.feed_id, feed_versions.id DESC")
stopLookupCte := sq.CTE{
Materialized: true,
Alias: "feed_version_stop_onestop_ids",
Expression: sub,
searchCte, searchOk := stopOnestopSearchCTE(where.OnestopIds)
if searchOk && where.AllowPreviousOnestopIds != nil && *where.AllowPreviousOnestopIds {
// Resolve (possibly historical) Onestop IDs as search keys: decode
// each to a point + name and match against current stops by location
// and name similarity, rather than a stored per-version association.
// See interline-io/tlv2#354. If no Onestop ID parses as a search key
// (searchOk is false), we fall through to the exact-match branch
// below, which returns empty for unknown ids.
//
// Source for the stop's current onestop_id, used to compare name
// components. On the materialized active path it is a column on the
// materialized table; otherwise it comes from a join to
// feed_version_stop_onestop_ids (matching the select expression).
curOsid := "feed_version_stop_onestop_ids.onestop_id"
if useActive.Active() && useActive.materialized {
curOsid = "gtfs_stops.onestop_id"
} else {
q = q.JoinClause(`LEFT JOIN feed_version_stop_onestop_ids ON feed_version_stop_onestop_ids.entity_id = gtfs_stops.stop_id and feed_version_stop_onestop_ids.feed_version_id = gtfs_stops.feed_version_id`)
}
q = q.
WithCTE(stopLookupCte).
Join("feed_version_stop_onestop_ids on feed_version_stop_onestop_ids.entity_id = gtfs_stops.stop_id and feed_version_stop_onestop_ids.feed_id = feed_versions.feed_id")
WithCTE(searchCte).
Join(
"stop_onestop_search on ST_DWithin(gtfs_stops.geometry, ST_MakePoint(stop_onestop_search.lng, stop_onestop_search.lat), ?) "+
"and (split_part("+curOsid+", '-', 3) = stop_onestop_search.name_component "+
"or word_similarity(stop_onestop_search.name_component, split_part("+curOsid+", '-', 3)) > ?)",
stopOnestopSearchRadius, stopOnestopSearchSimilarity,
)
distinct = true
} else {
q = q.JoinClause(`LEFT JOIN feed_version_stop_onestop_ids ON feed_version_stop_onestop_ids.entity_id = gtfs_stops.stop_id and feed_version_stop_onestop_ids.feed_version_id = gtfs_stops.feed_version_id`)
if len(where.OnestopIds) > 0 {
Expand Down
74 changes: 74 additions & 0 deletions server/finders/dbfinder/stop_onestop_search.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package dbfinder

import (
"strings"

sq "github.com/irees/squirrel"
"github.com/mmcloughlin/geohash"
)

// Tunables for resolving previous/looked-up stop Onestop IDs as search keys.
// A stop Onestop ID is "s-<geohash>-<filtered_name>"; rather than storing every
// historical (feed_version, stop_id, onestop_id) association, we decode the id
// back into its content claim — a point (the geohash) and a name — and search
// current stops for a match. See interline-io/tlv2#354.
var (
// Radius in meters around the decoded geohash point. Absorbs coordinate
// jitter between feed versions (the geohash-10 cell is ~1m; observed
// real-world renames move stops a few tens of meters).
stopOnestopSearchRadius = 100.0
// pg_trgm word_similarity threshold for matching the requested name
// component against the stop's current name component. Both sides are
// filterName() output, so the comparison is in the same normalized space.
stopOnestopSearchSimilarity = 0.4
)

// parseStopOnestopID splits "s-<geohash>-<name>" into the geohash center point
// and the name component. The name never contains '-' because filterName maps
// '-' to '~', so a 3-way split is exact. Returns ok=false for anything that is
// not a well-formed stop Onestop ID (wrong prefix, invalid geohash), so route/
// operator ids or garbage do not decode to a bogus point and search there.
func parseStopOnestopID(osid string) (lat float64, lng float64, name string, ok bool) {
parts := strings.SplitN(osid, "-", 3)
if len(parts) != 3 || parts[0] != "s" {
return 0, 0, "", false
}
if err := geohash.Validate(parts[1]); err != nil {
return 0, 0, "", false
}
lat, lng = geohash.DecodeCenter(parts[1])
return lat, lng, parts[2], true
}

// stopOnestopSearchCTE builds a small VALUES CTE of parsed inputs, one row per
// requested Onestop ID, so that any number of inputs can be resolved with a
// single spatial join (one GiST index probe per row) instead of per-input
// queries. Returns ok=false if no input parsed.
func stopOnestopSearchCTE(osids []string) (sq.CTE, bool) {
var rows []string
var args []interface{}
for _, osid := range osids {
lat, lng, name, ok := parseStopOnestopID(osid)
if !ok {
continue
}
if len(rows) == 0 {
// Cast the first row so Postgres can resolve the VALUES column
// types; an all-placeholder VALUES otherwise errors with
// "could not determine data type of parameter".
rows = append(rows, "(?::text,?::text,?::double precision,?::double precision)")
} else {
rows = append(rows, "(?,?,?,?)")
}
args = append(args, osid, name, lng, lat)
}
if len(rows) == 0 {
return sq.CTE{}, false
}
return sq.CTE{
Alias: "stop_onestop_search",
ColumnList: []string{"input_onestop_id", "name_component", "lng", "lat"},
Materialized: true,
Expression: sq.Expr("VALUES "+strings.Join(rows, ", "), args...),
}, true
}
14 changes: 13 additions & 1 deletion server/gql/stop_resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -982,11 +982,23 @@ func stopResolverPreviousOnestopIDTestcases(t testing.TB, cfg model.Config) []te
selectExpect: []string{"s-9q9nfswzpg-fruitvale"},
},
{
// A previous Onestop ID is resolved as a search key (geohash point +
// name) against current stops, so it returns the stop's *current*
// canonical Onestop ID, not the historical id that was queried.
name: "use previous",
query: `query($osid:String!, $previous:Boolean!) { stops(where:{onestop_id:$osid, allow_previous_onestop_ids:$previous}) { stop_id onestop_id }}`,
vars: hw{"osid": "s-9q9nfswzpg-fruitvale", "previous": true},
selector: "stops.#.onestop_id",
selectExpect: []string{"s-9q9nfswzpg-fruitvale"},
selectExpect: []string{"s-9q9nfsxn67-fruitvale"},
},
{
// A malformed/non-stop Onestop ID has no valid search key, so the
// request must return empty rather than an unconstrained result.
name: "previous with malformed id returns empty",
query: `query($osid:String!, $previous:Boolean!) { stops(where:{onestop_id:$osid, allow_previous_onestop_ids:$previous}) { stop_id onestop_id }}`,
vars: hw{"osid": "not-a-onestop-id", "previous": true},
selector: "stops.#.onestop_id",
selectExpect: []string{},
},
}
return testcases
Expand Down