Update script and data

This commit is contained in:
Will Bradley 2025-07-14 20:02:49 -07:00
parent 7c73e6e0a7
commit 0cea39bc1d
6 changed files with 3487049 additions and 1728 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -8,6 +8,12 @@ Compares two GeoJSON files containing road data and identifies:
Only reports differences that are significant (above minimum length threshold). Only reports differences that are significant (above minimum length threshold).
Optimized for performance with parallel processing and spatial indexing. Optimized for performance with parallel processing and spatial indexing.
TODO:
- put properties properly on removed roads, so they're visible in JOSM
- handle polygons properly (on previous geojson step?) for circular roads
- ignore roads that aren't LIFECYCLE ACTV or Active
- include OneWay=Y
""" """
import json import json
@ -15,7 +21,7 @@ import argparse
from pathlib import Path from pathlib import Path
from typing import List, Dict, Any, Tuple from typing import List, Dict, Any, Tuple
import geopandas as gpd import geopandas as gpd
from shapely.geometry import LineString, MultiLineString, Point from shapely.geometry import LineString, MultiLineString, Point, Polygon
from shapely.ops import unary_union from shapely.ops import unary_union
from shapely.strtree import STRtree from shapely.strtree import STRtree
import pandas as pd import pandas as pd
@ -35,7 +41,7 @@ warnings.filterwarnings('ignore')
import re import re
def titlecase(s): def titlecase(s):
return re.sub( return re.sub(
r"[A-Za-z]+('[A-Za-z]+)?", r"[A-Za-z0-9]+('[A-Za-z0-9]+)?",
lambda word: word.group(0).capitalize(), lambda word: word.group(0).capitalize(),
s) s)
@ -70,8 +76,8 @@ class RoadComparator:
# Use pyogr engine for faster loading of large files # Use pyogr engine for faster loading of large files
gdf = gpd.read_file(filepath, engine='pyogrio') gdf = gpd.read_file(filepath, engine='pyogrio')
# Filter only LineString and MultiLineString geometries # Filter only LineString, MultiLineString, and Polygon geometries
line_types = ['LineString', 'MultiLineString'] line_types = ['LineString', 'MultiLineString', 'Polygon']
gdf = gdf[gdf.geometry.type.isin(line_types)].copy() gdf = gdf[gdf.geometry.type.isin(line_types)].copy()
if len(gdf) == 0: if len(gdf) == 0:
@ -202,7 +208,7 @@ class RoadComparator:
if isinstance(geom, MultiLineString): if isinstance(geom, MultiLineString):
lines = list(geom.geoms) lines = list(geom.geoms)
else: else:
lines = [geom] lines = [geom] # Polygon and Line can be accessed directly
for line in lines: for line in lines:
try: try:
@ -302,8 +308,8 @@ class RoadComparator:
# Include the entire road if: # Include the entire road if:
# 1. The uncovered portion is above minimum threshold, AND # 1. The uncovered portion is above minimum threshold, AND
# 2. More than 50% of the road is uncovered # 2. More than 10% of the road is uncovered
if uncovered_ratio > 0.5: if uncovered_ratio > 0.1:
#uncovered_length >= min_length_deg and #uncovered_length >= min_length_deg and
# Include entire original road with all original metadata # Include entire original road with all original metadata
original_properties = dict(row.drop('geometry')) original_properties = dict(row.drop('geometry'))