Update script and data

This commit is contained in:
Will Bradley 2025-07-14 20:02:49 -07:00
parent 7c73e6e0a7
commit 0cea39bc1d
6 changed files with 3487049 additions and 1728 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -8,6 +8,12 @@ Compares two GeoJSON files containing road data and identifies:
Only reports differences that are significant (above minimum length threshold).
Optimized for performance with parallel processing and spatial indexing.
TODO:
- put properties properly on removed roads, so they're visible in JOSM
- handle polygons properly (on previous geojson step?) for circular roads
- ignore roads that aren't LIFECYCLE ACTV or Active
- include OneWay=Y
"""
import json
@ -15,7 +21,7 @@ import argparse
from pathlib import Path
from typing import List, Dict, Any, Tuple
import geopandas as gpd
from shapely.geometry import LineString, MultiLineString, Point
from shapely.geometry import LineString, MultiLineString, Point, Polygon
from shapely.ops import unary_union
from shapely.strtree import STRtree
import pandas as pd
@ -35,7 +41,7 @@ warnings.filterwarnings('ignore')
import re
def titlecase(s):
return re.sub(
r"[A-Za-z]+('[A-Za-z]+)?",
r"[A-Za-z0-9]+('[A-Za-z0-9]+)?",
lambda word: word.group(0).capitalize(),
s)
@ -70,8 +76,8 @@ class RoadComparator:
# Use pyogr engine for faster loading of large files
gdf = gpd.read_file(filepath, engine='pyogrio')
# Filter only LineString and MultiLineString geometries
line_types = ['LineString', 'MultiLineString']
# Filter only LineString, MultiLineString, and Polygon geometries
line_types = ['LineString', 'MultiLineString', 'Polygon']
gdf = gdf[gdf.geometry.type.isin(line_types)].copy()
if len(gdf) == 0:
@ -202,7 +208,7 @@ class RoadComparator:
if isinstance(geom, MultiLineString):
lines = list(geom.geoms)
else:
lines = [geom]
lines = [geom] # Polygon and Line can be accessed directly
for line in lines:
try:
@ -302,8 +308,8 @@ class RoadComparator:
# Include the entire road if:
# 1. The uncovered portion is above minimum threshold, AND
# 2. More than 50% of the road is uncovered
if uncovered_ratio > 0.5:
# 2. More than 10% of the road is uncovered
if uncovered_ratio > 0.1:
#uncovered_length >= min_length_deg and
# Include entire original road with all original metadata
original_properties = dict(row.drop('geometry'))