From b2101348ad669ce528c9fea32dffb630bef22cb3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 21:29:14 +0000 Subject: [PATCH 1/2] Initial plan From 83e7daa1b41b181ee8870dfaf9fbc98e2d9508c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 7 Mar 2026 21:49:34 +0000 Subject: [PATCH 2/2] Implement topology gap/overlap detection and distance distortion Co-authored-by: chrislyonsKY <135981795+chrislyonsKY@users.noreply.github.com> --- src/checkers/data_quality/topology_gaps.rs | 280 ++++++++++++++++-- .../data_quality/topology_overlaps.rs | 249 ++++++++++++++-- .../projection/distance_distortion.rs | 163 +++++++++- 3 files changed, 622 insertions(+), 70 deletions(-) diff --git a/src/checkers/data_quality/topology_gaps.rs b/src/checkers/data_quality/topology_gaps.rs index b9394df..9a01961 100644 --- a/src/checkers/data_quality/topology_gaps.rs +++ b/src/checkers/data_quality/topology_gaps.rs @@ -3,7 +3,8 @@ //! Uses an R-tree spatial index for efficient neighbor queries. //! Gaps indicate missing coverage between polygon boundaries. -use geo::Geometry; +use geo::{BoundingRect, Distance, Euclidean, Geometry, Polygon}; +use rstar::{AABB, RTree, RTreeObject}; use crate::core::rule::{ CheckContext, Domain, Finding, Rule, RuleEntry, Severity, SpatialLocation, @@ -18,9 +19,29 @@ impl Default for TopologyGaps { } } -/// Check if a geometry is a polygon type (Polygon or MultiPolygon). -fn is_polygon_geometry(geom: &Geometry) -> bool { - matches!(geom, Geometry::Polygon(_) | Geometry::MultiPolygon(_)) +/// Lightweight record for R-tree indexing of polygon bounding boxes. +struct IndexedBbox { + /// Index into the owning `polys` slice. + idx: usize, + min: [f64; 2], + max: [f64; 2], +} + +impl RTreeObject for IndexedBbox { + type Envelope = AABB<[f64; 2]>; + + fn envelope(&self) -> Self::Envelope { + AABB::from_corners(self.min, self.max) + } +} + +/// Extract all `Polygon` instances from a geometry. +fn extract_polygons(geom: &Geometry) -> Vec> { + match geom { + Geometry::Polygon(p) => vec![p.clone()], + Geometry::MultiPolygon(mp) => mp.0.clone(), + _ => vec![], + } } impl Rule for TopologyGaps { @@ -49,39 +70,131 @@ impl Rule for TopologyGaps { } fn check(&self, ctx: &CheckContext) -> Vec { - let findings = Vec::new(); + let mut findings = Vec::new(); for layer in ctx.layers { - // Collect polygon features for this layer. - let polygons: Vec<(usize, &Geometry)> = layer - .features + // Collect (feature_label, polygon) pairs from all polygon-type features. + let mut polys: Vec<(String, Polygon)> = Vec::new(); + for feature in &layer.features { + let geom = match &feature.geometry { + Some(g) => g, + None => continue, + }; + let label = feature + .id + .clone() + .unwrap_or_else(|| format!("#{}", polys.len())); + for poly in extract_polygons(geom) { + polys.push((label.clone(), poly)); + } + } + + if polys.len() < 2 { + continue; + } + + // Compute a gap tolerance as a fraction of the layer's overall extent. + // Using a relative tolerance (0.01% of the longest axis) makes the + // threshold proportional to the CRS units and dataset scale: it is roughly + // 1 cm for a 100 m plot, 10 m for a 100 km county dataset, or 0.01° for + // a global dataset in WGS 84. The 1e-8 clamp prevents division-by-zero + // issues on degenerate single-point extents. + let (min_x, min_y, max_x, max_y) = + polys.iter().filter_map(|(_, p)| p.bounding_rect()).fold( + (f64::MAX, f64::MAX, f64::MIN, f64::MIN), + |(ax, ay, bx, by), r| { + ( + ax.min(r.min().x), + ay.min(r.min().y), + bx.max(r.max().x), + by.max(r.max().y), + ) + }, + ); + let extent = (max_x - min_x).max(max_y - min_y); + let gap_tolerance = (extent * 1e-4).max(1e-8); + + // Build R-tree with bounding boxes expanded by `gap_tolerance` on each side + // so that nearby polygons appear as bbox-intersecting candidates. + let entries: Vec = polys .iter() .enumerate() - .filter_map(|(idx, f)| { - f.geometry - .as_ref() - .filter(|g| is_polygon_geometry(g)) - .map(|g| (idx, g)) + .filter_map(|(idx, (_, poly))| { + poly.bounding_rect().map(|r| IndexedBbox { + idx, + min: [r.min().x - gap_tolerance, r.min().y - gap_tolerance], + max: [r.max().x + gap_tolerance, r.max().y + gap_tolerance], + }) }) .collect(); - if polygons.len() < 2 { - continue; - } + let tree = RTree::bulk_load(entries); + + // Track reported pairs to avoid duplicate findings. + let mut reported: std::collections::HashSet<(usize, usize)> = + std::collections::HashSet::new(); - // R-tree based gap detection: build spatial index, find adjacent polygons, - // compute difference to detect gap regions. - // This requires computing the union boundary and finding uncovered areas. - todo!( - "R-tree gap detection: build rstar index from polygon envelopes, query neighbors, compute gap geometries" - ); - - #[allow(unreachable_code)] - { - let _ = &findings; - let _ = SpatialLocation::Layer { - name: layer.name.clone(), + for (i, (label_a, poly_a)) in polys.iter().enumerate() { + let bbox_a = match poly_a.bounding_rect() { + Some(b) => b, + None => continue, }; + + // Expand the query envelope by gap_tolerance to find near-neighbors. + let query = AABB::from_corners( + [ + bbox_a.min().x - gap_tolerance, + bbox_a.min().y - gap_tolerance, + ], + [ + bbox_a.max().x + gap_tolerance, + bbox_a.max().y + gap_tolerance, + ], + ); + + for candidate in tree.locate_in_envelope_intersecting(&query) { + let j = candidate.idx; + if j <= i { + continue; + } + let pair = (i, j); + if reported.contains(&pair) { + continue; + } + + let (label_b, poly_b) = &polys[j]; + + // The R-tree already filtered to bbox-intersecting candidates after + // expansion by `gap_tolerance`, so only genuinely nearby polygons + // reach this exact distance call. Exact polygon distance is only + // computed for this small candidate set. + let dist = + Euclidean::distance(poly_a as &Polygon, poly_b as &Polygon); + + // A gap exists when polygons are close but don't actually touch. + // dist == 0.0 means they touch or overlap (handled by topology-overlaps). + if dist > 0.0 && dist <= gap_tolerance { + reported.insert(pair); + + findings.push(Finding { + rule_id: self.id().to_string(), + severity: self.default_severity(), + message: format!( + "Gap of {dist:.6} units between features {label_a} and {label_b} in layer '{}'", + layer.name + ), + location: Some(SpatialLocation::Layer { + name: layer.name.clone(), + }), + geometry: None, + metric: Some(dist), + suggestion: Some( + "Close the gap by snapping polygon vertices. Use `tissot fix --topology` for automated repair.".to_string(), + ), + fixable: true, + }); + } + } } } @@ -174,4 +287,115 @@ mod tests { let rule = TopologyGaps; assert!(rule.check(&ctx).is_empty()); } + + #[test] + fn detects_small_gap_between_polygons() { + // poly_a: [0,0]–[1,1], poly_b: [1.00001,0]–[2.00001,1] + // The gap is 0.00001 units — within the tolerance (fraction of 2-unit extent). + let poly_a = geo::Polygon::new( + geo::LineString::from(vec![ + (0.0, 0.0), + (1.0, 0.0), + (1.0, 1.0), + (0.0, 1.0), + (0.0, 0.0), + ]), + vec![], + ); + let gap = 0.00001_f64; + let poly_b = geo::Polygon::new( + geo::LineString::from(vec![ + (1.0 + gap, 0.0), + (2.0 + gap, 0.0), + (2.0 + gap, 1.0), + (1.0 + gap, 1.0), + (1.0 + gap, 0.0), + ]), + vec![], + ); + let layer = Layer { + name: "test".into(), + crs: Some("EPSG:4326".into()), + features: vec![ + Feature { + id: Some("a".into()), + geometry: Some(geo::Geometry::Polygon(poly_a)), + properties: HashMap::new(), + }, + Feature { + id: Some("b".into()), + geometry: Some(geo::Geometry::Polygon(poly_b)), + properties: HashMap::new(), + }, + ], + bounds: None, + }; + + let config = Config::default(); + let ctx = CheckContext { + layers: &[layer], + config: &config, + file_path: "test.geojson", + }; + + let rule = TopologyGaps; + let findings = rule.check(&ctx); + assert_eq!(findings.len(), 1, "Expected one gap finding"); + assert!(findings[0].message.contains("Gap")); + assert!(findings[0].metric.unwrap() > 0.0); + assert!(findings[0].fixable); + } + + #[test] + fn no_findings_for_touching_polygons() { + // poly_a: [0,0]–[1,1], poly_b: [1,0]–[2,1] — they share exactly one edge. + let poly_a = geo::Polygon::new( + geo::LineString::from(vec![ + (0.0, 0.0), + (1.0, 0.0), + (1.0, 1.0), + (0.0, 1.0), + (0.0, 0.0), + ]), + vec![], + ); + let poly_b = geo::Polygon::new( + geo::LineString::from(vec![ + (1.0, 0.0), + (2.0, 0.0), + (2.0, 1.0), + (1.0, 1.0), + (1.0, 0.0), + ]), + vec![], + ); + let layer = Layer { + name: "test".into(), + crs: Some("EPSG:4326".into()), + features: vec![ + Feature { + id: Some("a".into()), + geometry: Some(geo::Geometry::Polygon(poly_a)), + properties: HashMap::new(), + }, + Feature { + id: Some("b".into()), + geometry: Some(geo::Geometry::Polygon(poly_b)), + properties: HashMap::new(), + }, + ], + bounds: None, + }; + + let config = Config::default(); + let ctx = CheckContext { + layers: &[layer], + config: &config, + file_path: "test.geojson", + }; + + let rule = TopologyGaps; + // Touching polygons (dist == 0) should not be reported as gaps. + assert!(rule.check(&ctx).is_empty()); + } } diff --git a/src/checkers/data_quality/topology_overlaps.rs b/src/checkers/data_quality/topology_overlaps.rs index 577a86a..5adae6a 100644 --- a/src/checkers/data_quality/topology_overlaps.rs +++ b/src/checkers/data_quality/topology_overlaps.rs @@ -3,7 +3,8 @@ //! Uses an R-tree spatial index for efficient intersection queries. //! Overlaps may indicate digitization errors or conflicting boundaries. -use geo::Geometry; +use geo::{BooleanOps, BoundingRect, Geometry, Polygon}; +use rstar::{AABB, RTree, RTreeObject}; use crate::core::rule::{ CheckContext, Domain, Finding, Rule, RuleEntry, Severity, SpatialLocation, @@ -18,9 +19,29 @@ impl Default for TopologyOverlaps { } } -/// Check if a geometry is a polygon type (Polygon or MultiPolygon). -fn is_polygon_geometry(geom: &Geometry) -> bool { - matches!(geom, Geometry::Polygon(_) | Geometry::MultiPolygon(_)) +/// Lightweight polygon record for R-tree indexing. +struct IndexedBbox { + /// Index into the owning `polygons` slice. + idx: usize, + min: [f64; 2], + max: [f64; 2], +} + +impl RTreeObject for IndexedBbox { + type Envelope = AABB<[f64; 2]>; + + fn envelope(&self) -> Self::Envelope { + AABB::from_corners(self.min, self.max) + } +} + +/// Extract all `Polygon` instances from a geometry (handles MultiPolygon). +fn extract_polygons(geom: &Geometry) -> Vec> { + match geom { + Geometry::Polygon(p) => vec![p.clone()], + Geometry::MultiPolygon(mp) => mp.0.clone(), + _ => vec![], + } } impl Rule for TopologyOverlaps { @@ -49,39 +70,103 @@ impl Rule for TopologyOverlaps { } fn check(&self, ctx: &CheckContext) -> Vec { - let findings = Vec::new(); + let mut findings = Vec::new(); for layer in ctx.layers { - // Collect polygon features for this layer. - let polygons: Vec<(usize, &Geometry)> = layer - .features + // Collect (feature_label, polygon) pairs from all polygon-type features. + let mut polys: Vec<(String, Polygon)> = Vec::new(); + for feature in &layer.features { + let geom = match &feature.geometry { + Some(g) => g, + None => continue, + }; + let label = feature + .id + .clone() + .unwrap_or_else(|| format!("#{}", polys.len())); + for poly in extract_polygons(geom) { + polys.push((label.clone(), poly)); + } + } + + if polys.len() < 2 { + continue; + } + + // Build R-tree from bounding boxes for efficient candidate filtering. + let entries: Vec = polys .iter() .enumerate() - .filter_map(|(idx, f)| { - f.geometry - .as_ref() - .filter(|g| is_polygon_geometry(g)) - .map(|g| (idx, g)) + .filter_map(|(idx, (_, poly))| { + poly.bounding_rect().map(|r| IndexedBbox { + idx, + min: [r.min().x, r.min().y], + max: [r.max().x, r.max().y], + }) }) .collect(); - if polygons.len() < 2 { - continue; - } + let tree = RTree::bulk_load(entries); - // R-tree based overlap detection: build spatial index from envelopes, - // for each polygon find candidates with overlapping bounding boxes, - // compute actual polygon intersection to detect overlapping regions. - todo!( - "R-tree overlap detection: build rstar index, query intersecting envelopes, compute pairwise polygon intersections" - ); - - #[allow(unreachable_code)] - { - let _ = &findings; - let _ = SpatialLocation::Layer { - name: layer.name.clone(), + // Track reported pairs to avoid duplicate findings. + let mut reported: std::collections::HashSet<(usize, usize)> = + std::collections::HashSet::new(); + + for (i, (label_a, poly_a)) in polys.iter().enumerate() { + let bbox_a = match poly_a.bounding_rect() { + Some(b) => b, + None => continue, }; + + let query = AABB::from_corners( + [bbox_a.min().x, bbox_a.min().y], + [bbox_a.max().x, bbox_a.max().y], + ); + + for candidate in tree.locate_in_envelope_intersecting(&query) { + let j = candidate.idx; + if j <= i { + continue; + } + let pair = (i, j); + if reported.contains(&pair) { + continue; + } + + let (label_b, poly_b) = &polys[j]; + + // Compute the actual intersection area. + // We use BooleanOps::intersection() for accuracy; the area threshold + // filters out floating-point noise from boundary-only touches. + // 1e-10 sq units is negligible for any realistic polygon dataset. + use geo::Area; + let intersection = poly_a.intersection(poly_b); + let area = intersection.unsigned_area(); + + if area > 1e-10 { + reported.insert(pair); + + findings.push(Finding { + rule_id: self.id().to_string(), + severity: self.default_severity(), + message: format!( + "Features {label_a} and {label_b} in layer '{}' overlap (shared area: {area:.4} sq units)", + layer.name + ), + location: Some(SpatialLocation::Layer { + name: layer.name.clone(), + }), + geometry: intersection + .bounding_rect() + .map(geo::Geometry::Rect), + metric: Some(area), + suggestion: Some( + "Remove overlapping areas. Use `tissot fix --topology` for automated repair.".to_string(), + ), + fixable: true, + }); + } + } } } @@ -174,4 +259,112 @@ mod tests { let rule = TopologyOverlaps; assert!(rule.check(&ctx).is_empty()); } + + #[test] + fn detects_overlapping_polygons() { + // poly_a: [0,0]–[2,2], poly_b: [1,0]–[3,2] — they share a 1×2 interior strip. + let poly_a = geo::Polygon::new( + geo::LineString::from(vec![ + (0.0, 0.0), + (2.0, 0.0), + (2.0, 2.0), + (0.0, 2.0), + (0.0, 0.0), + ]), + vec![], + ); + let poly_b = geo::Polygon::new( + geo::LineString::from(vec![ + (1.0, 0.0), + (3.0, 0.0), + (3.0, 2.0), + (1.0, 2.0), + (1.0, 0.0), + ]), + vec![], + ); + let layer = Layer { + name: "test".into(), + crs: Some("EPSG:4326".into()), + features: vec![ + Feature { + id: Some("a".into()), + geometry: Some(geo::Geometry::Polygon(poly_a)), + properties: HashMap::new(), + }, + Feature { + id: Some("b".into()), + geometry: Some(geo::Geometry::Polygon(poly_b)), + properties: HashMap::new(), + }, + ], + bounds: None, + }; + + let config = Config::default(); + let ctx = CheckContext { + layers: &[layer], + config: &config, + file_path: "test.geojson", + }; + + let rule = TopologyOverlaps; + let findings = rule.check(&ctx); + assert_eq!(findings.len(), 1); + assert!(findings[0].message.contains("overlap")); + assert!(findings[0].metric.unwrap() > 0.0); + assert!(findings[0].fixable); + } + + #[test] + fn no_findings_for_adjacent_polygons() { + // poly_a: [0,0]–[1,1], poly_b: [1,0]–[2,1] — they share only a boundary edge. + let poly_a = geo::Polygon::new( + geo::LineString::from(vec![ + (0.0, 0.0), + (1.0, 0.0), + (1.0, 1.0), + (0.0, 1.0), + (0.0, 0.0), + ]), + vec![], + ); + let poly_b = geo::Polygon::new( + geo::LineString::from(vec![ + (1.0, 0.0), + (2.0, 0.0), + (2.0, 1.0), + (1.0, 1.0), + (1.0, 0.0), + ]), + vec![], + ); + let layer = Layer { + name: "test".into(), + crs: Some("EPSG:4326".into()), + features: vec![ + Feature { + id: Some("a".into()), + geometry: Some(geo::Geometry::Polygon(poly_a)), + properties: HashMap::new(), + }, + Feature { + id: Some("b".into()), + geometry: Some(geo::Geometry::Polygon(poly_b)), + properties: HashMap::new(), + }, + ], + bounds: None, + }; + + let config = Config::default(); + let ctx = CheckContext { + layers: &[layer], + config: &config, + file_path: "test.geojson", + }; + + let rule = TopologyOverlaps; + assert!(rule.check(&ctx).is_empty()); + } } diff --git a/src/checkers/projection/distance_distortion.rs b/src/checkers/projection/distance_distortion.rs index b082ba5..b7a64e0 100644 --- a/src/checkers/projection/distance_distortion.rs +++ b/src/checkers/projection/distance_distortion.rs @@ -38,7 +38,7 @@ impl Rule for DistanceDistortion { } fn check(&self, ctx: &CheckContext) -> Vec { - let findings = Vec::new(); + let mut findings = Vec::new(); for layer in ctx.layers { let crs = match &layer.crs { @@ -69,19 +69,102 @@ impl Rule for DistanceDistortion { continue; } - // Sample centroid pairs, compute projected (Euclidean) vs geodesic distance, - // and report deviation as percentage error. - // Requires inverse-projecting points to geographic coords for geodesic calc. - todo!( - "Distance distortion: inverse-project centroids to WGS84 via proj, compute geodesic vs Euclidean distance, report max/mean error" - ); - - #[allow(unreachable_code)] - { - let _ = &findings; - let _ = SpatialLocation::Layer { - name: layer.name.clone(), - }; + // Build inverse projection: from the layer's CRS back to WGS 84. + let inv_proj = match proj::Proj::new_known_crs(&crs, "EPSG:4326", None) { + Ok(p) => p, + Err(e) => { + log::warn!( + "Distance distortion: cannot build inverse projection for {crs}: {e}" + ); + continue; + } + }; + + // Cap the number of centroids to bound the O(n²) pairwise comparison cost. + // With 30 centroids, we get at most 435 pairs — enough for a representative + // sample while keeping per-layer overhead under a millisecond for typical data. + const MAX_CENTROIDS: usize = 30; + let sample: Vec<(usize, geo::Point)> = + centroids.into_iter().take(MAX_CENTROIDS).collect(); + + // Inverse-project sampled centroids to WGS 84. + let wgs84: Vec> = sample + .iter() + .map(|(_, pt)| { + let coord = geo::coord! { x: pt.x(), y: pt.y() }; + inv_proj + .convert(coord) + .ok() + .map(|c| geo::Point::new(c.x, c.y)) + }) + .collect(); + + // Compute pairwise projected (Euclidean) vs geodesic distances. + // O(n²) over MAX_CENTROIDS — bounded by the cap above. + let mut errors: Vec = Vec::new(); + for i in 0..sample.len() { + for j in (i + 1)..sample.len() { + let (_, pt_a) = &sample[i]; + let (_, pt_b) = &sample[j]; + + // Euclidean distance in the projected CRS units. + let dx = pt_a.x() - pt_b.x(); + let dy = pt_a.y() - pt_b.y(); + let euclidean = (dx * dx + dy * dy).sqrt(); + + // Skip co-located points: distortion ratio is undefined and + // the threshold of 1.0 CRS unit is appropriate for meter-based CRS + // (geographic CRS is already excluded above). + if euclidean < 1.0 { + continue; + } + + // Geodesic distance via haversine on the WGS 84 ellipsoid. + let (wgs_a, wgs_b) = match (&wgs84[i], &wgs84[j]) { + (Some(a), Some(b)) => (a, b), + _ => continue, + }; + + use geo::{Distance, Haversine}; + let geodesic = Haversine::distance(*wgs_a, *wgs_b); + + if geodesic < 1.0 { + continue; + } + + let error_pct = ((euclidean - geodesic) / geodesic).abs() * 100.0; + errors.push(error_pct); + } + } + + if errors.is_empty() { + continue; + } + + let max_error = errors.iter().cloned().fold(f64::NEG_INFINITY, f64::max); + let mean_error = errors.iter().sum::() / errors.len() as f64; + + // Report if distance distortion exceeds the warning threshold. + let warning_threshold = ctx.config.check.max_distortion_pct; + if max_error > warning_threshold { + findings.push(Finding { + rule_id: self.id().to_string(), + severity: self.default_severity(), + message: format!( + "Layer '{}' has {max_error:.1}% max distance distortion (mean: {mean_error:.1}%) in CRS {crs}", + layer.name + ), + location: Some(SpatialLocation::Layer { + name: layer.name.clone(), + }), + geometry: None, + metric: Some(max_error), + suggestion: Some(format!( + "Run `tissot xray {}` to visualise distortion and get CRS recommendations.", + ctx.file_path + )), + fixable: false, + }); } } @@ -186,4 +269,56 @@ mod tests { let rule = DistanceDistortion; assert!(rule.check(&ctx).is_empty()); } + + #[test] + fn detects_web_mercator_high_latitude_distance_distortion() { + // Web Mercator (EPSG:3857) significantly stretches distances at high latitudes. + // Place two points near 60°N in Web Mercator projected coordinates. + // EPSG:3857 at ~60°N: x ≈ lon * 111_319, y ≈ (R * ln(tan(π/4 + lat/2))) + // Two points separated by ~100km horizontally at 60°N. + use crate::core::config::Config; + use crate::core::rule::{Feature, Layer}; + use std::collections::HashMap; + + // Approximate projected coords for points near 60°N, separated by ~1° longitude. + // At 60°N in Web Mercator: y ≈ 8_399_737 + let x1 = -9_392_582.0_f64; // ~-84.4° lon + let x2 = -9_281_263.0_f64; // ~-83.4° lon + let y = 8_399_737.0_f64; // ~60°N + + let layer = Layer { + name: "high_lat".into(), + crs: Some("EPSG:3857".into()), + features: vec![ + Feature { + id: Some("1".into()), + geometry: Some(geo::Geometry::Point(geo::Point::new(x1, y))), + properties: HashMap::new(), + }, + Feature { + id: Some("2".into()), + geometry: Some(geo::Geometry::Point(geo::Point::new(x2, y))), + properties: HashMap::new(), + }, + ], + bounds: None, + }; + + // Use a very low distortion threshold so the rule fires. + let mut config = Config::default(); + config.check.max_distortion_pct = 0.1; + let ctx = CheckContext { + layers: &[layer], + config: &config, + file_path: "test.geojson", + }; + + let rule = DistanceDistortion; + let findings = rule.check(&ctx); + assert!( + !findings.is_empty(), + "Expected distance distortion finding for Web Mercator at 60°N" + ); + assert!(findings[0].metric.unwrap() > 0.0); + } }