Update script and data

2025-07-14 20:02:49 -07:00 · 2025-07-14 20:02:49 -07:00 · 0cea39bc1d
commit 0cea39bc1d
parent 7c73e6e0a7
6 changed files with 3487049 additions and 1728 deletions
--- a/import/diff-sumter-roads.geojson
+++ b/import/diff-sumter-roads.geojson
--- a/import/sumter-roads-250710.osm
+++ b/import/sumter-roads-250710.osm
--- a/data/Sumter/sumter-roads-7-8-25.geojson
+++ b/data/Sumter/sumter-roads-7-8-25.geojson
--- a/data/diff-sumter-roads-6-28-25.geojson
+++ b/data/diff-sumter-roads-6-28-25.geojson
--- a/data/diff-sumter-roads.geojson
+++ b/data/diff-sumter-roads.geojson
--- a/threaded.py
+++ b/threaded.py
@ -8,6 +8,12 @@ Compares two GeoJSON files containing road data and identifies:
 Only reports differences that are significant (above minimum length threshold).
 Optimized for performance with parallel processing and spatial indexing.
 TODO:
 - put properties properly on removed roads, so they're visible in JOSM
 - handle polygons properly (on previous geojson step?) for circular roads
 - ignore roads that aren't LIFECYCLE ACTV or Active
 - include OneWay=Y
 """
 import json
@ -15,7 +21,7 @@ import argparse
 from pathlib import Path
 from typing import List, Dict, Any, Tuple
 import geopandas as gpd
-from shapely.geometry import LineString, MultiLineString, Point
+from shapely.geometry import LineString, MultiLineString, Point, Polygon
 from shapely.ops import unary_union
 from shapely.strtree import STRtree
 import pandas as pd
@ -35,7 +41,7 @@ warnings.filterwarnings('ignore')
 import re
 def titlecase(s):
    return re.sub(
-        r"[A-Za-z]+('[A-Za-z]+)?",
+        r"[A-Za-z0-9]+('[A-Za-z0-9]+)?",
        lambda word: word.group(0).capitalize(),
        s)
@ -70,8 +76,8 @@ class RoadComparator:
            # Use pyogr engine for faster loading of large files
            gdf = gpd.read_file(filepath, engine='pyogrio')
-            # Filter only LineString and MultiLineString geometries
+            # Filter only LineString, MultiLineString, and Polygon geometries
-            line_types = ['LineString', 'MultiLineString']
+            line_types = ['LineString', 'MultiLineString', 'Polygon']
            gdf = gdf[gdf.geometry.type.isin(line_types)].copy()
            if len(gdf) == 0:
@ -202,7 +208,7 @@ class RoadComparator:
            if isinstance(geom, MultiLineString):
                lines = list(geom.geoms)
            else:
-                lines = [geom]
+                lines = [geom] # Polygon and Line can be accessed directly
            for line in lines:
                try:
@ -302,8 +308,8 @@ class RoadComparator:
                    # Include the entire road if:
                    # 1. The uncovered portion is above minimum threshold, AND
-                    # 2. More than 50% of the road is uncovered
+                    # 2. More than 10% of the road is uncovered
-                    if uncovered_ratio > 0.5:
+                    if uncovered_ratio > 0.1:
                        #uncovered_length >= min_length_deg and 
                        # Include entire original road with all original metadata
                        original_properties = dict(row.drop('geometry'))