ok but dupe highways
This commit is contained in:
11
README.md
11
README.md
@@ -5,6 +5,17 @@ See [https://wiki.openstreetmap.org/wiki/The_Villages_Road_and_Address_Import](h
|
|||||||
See compare-addresses.py for an automated way of running the complete address diff toolchain in one step.
|
See compare-addresses.py for an automated way of running the complete address diff toolchain in one step.
|
||||||
- TODO: fails to split out units
|
- TODO: fails to split out units
|
||||||
|
|
||||||
|
## New Instructions
|
||||||
|
|
||||||
|
### Roads
|
||||||
|
|
||||||
|
* Get new data from the county and convert it:
|
||||||
|
* ``python shp-to-geojson.py "original data/Sumter/RoadCenterlines_041125.shp.zip" "original data/Sumter/RoadCenterlines_041125.geojson"``
|
||||||
|
* Get new data from OSM:
|
||||||
|
* `python download-overpass.py --type highways "Sumter County" "Florida" "original data/Sumter/sumter-roads-$(date +%y%m%d).geojson"`
|
||||||
|
* Diff the roads:
|
||||||
|
* `python threaded.py --exclude-unnamed --output 'processed data\Sumter\diff-sumter-roads-$(date +%y%m%d).geojson' 'original data\Sumter\sumter-roads-251122.geojson' 'original data\Sumter\RoadCenterlines_041125.geojson'`
|
||||||
|
|
||||||
## Data
|
## Data
|
||||||
|
|
||||||
- Lake County Streets and Address Points: https://c.lakecountyfl.gov/ftp/GIS/GisDownloads/Shapefiles/
|
- Lake County Streets and Address Points: https://c.lakecountyfl.gov/ftp/GIS/GisDownloads/Shapefiles/
|
||||||
|
|||||||
@@ -3,9 +3,9 @@
|
|||||||
Download OSM data from Overpass API for a given county and save as GeoJSON.
|
Download OSM data from Overpass API for a given county and save as GeoJSON.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python download-overpass.py "Sumter County Florida" highways.geojson
|
python download-overpass.py --type highways "Sumter County" "Florida" output/roads.geojson
|
||||||
python download-overpass.py "Lake County Florida" output/lake-addresses.geojson --type addresses
|
python download-overpass.py --type addresses "Lake County" "Florida" output/addresses.geojson
|
||||||
python download-overpass.py "Sumter County Florida" paths.geojson --type multimodal
|
python download-overpass.py --type multimodal "Sumter County" "Florida" output/paths.geojson
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
- Don't just download roads. Probably ignore relations also.
|
- Don't just download roads. Probably ignore relations also.
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
50
threaded.py
50
threaded.py
@@ -54,7 +54,7 @@ def titlecase(s):
|
|||||||
|
|
||||||
class RoadComparator:
|
class RoadComparator:
|
||||||
def __init__(self, tolerance_feet: float = 50.0, min_gap_length_feet: float = 100.0,
|
def __init__(self, tolerance_feet: float = 50.0, min_gap_length_feet: float = 100.0,
|
||||||
n_jobs: int = None, chunk_size: int = 1000):
|
n_jobs: int = None, chunk_size: int = 1000, exclude_unnamed: bool = False):
|
||||||
"""
|
"""
|
||||||
Initialize the road comparator.
|
Initialize the road comparator.
|
||||||
|
|
||||||
@@ -63,11 +63,13 @@ class RoadComparator:
|
|||||||
min_gap_length_feet: Minimum length of gap/extra to be considered significant (default: 100 feet)
|
min_gap_length_feet: Minimum length of gap/extra to be considered significant (default: 100 feet)
|
||||||
n_jobs: Number of parallel processes to use (default: CPU count - 1)
|
n_jobs: Number of parallel processes to use (default: CPU count - 1)
|
||||||
chunk_size: Number of geometries to process per chunk (default: 1000)
|
chunk_size: Number of geometries to process per chunk (default: 1000)
|
||||||
|
exclude_unnamed: Exclude features without name/highway tags from coverage (default: False)
|
||||||
"""
|
"""
|
||||||
self.tolerance_feet = tolerance_feet
|
self.tolerance_feet = tolerance_feet
|
||||||
self.min_gap_length_feet = min_gap_length_feet
|
self.min_gap_length_feet = min_gap_length_feet
|
||||||
self.n_jobs = n_jobs or max(1, mp.cpu_count() - 1)
|
self.n_jobs = n_jobs or max(1, mp.cpu_count() - 1)
|
||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
|
self.exclude_unnamed = exclude_unnamed
|
||||||
|
|
||||||
# Convert feet to degrees (approximate conversion for continental US)
|
# Convert feet to degrees (approximate conversion for continental US)
|
||||||
# 1 degree latitude ≈ 364,000 feet
|
# 1 degree latitude ≈ 364,000 feet
|
||||||
@@ -76,8 +78,29 @@ class RoadComparator:
|
|||||||
self.min_gap_length_deg = min_gap_length_feet / 364000.0
|
self.min_gap_length_deg = min_gap_length_feet / 364000.0
|
||||||
|
|
||||||
print(f"Using {self.n_jobs} parallel processes with chunk size {self.chunk_size}")
|
print(f"Using {self.n_jobs} parallel processes with chunk size {self.chunk_size}")
|
||||||
|
if self.exclude_unnamed:
|
||||||
|
print("Excluding unnamed features from coverage calculation")
|
||||||
|
|
||||||
def load_geojson(self, filepath: str) -> gpd.GeoDataFrame:
|
def _has_name(self, row) -> bool:
|
||||||
|
"""Check if a feature has a name tag (for OSM data filtering)."""
|
||||||
|
# Check for OSM-style tags (stored as JSON string)
|
||||||
|
if 'tags' in row.index:
|
||||||
|
tags = row.get('tags')
|
||||||
|
if isinstance(tags, dict):
|
||||||
|
return bool(tags.get('name'))
|
||||||
|
elif isinstance(tags, str):
|
||||||
|
# Tags stored as JSON string
|
||||||
|
try:
|
||||||
|
tags_dict = json.loads(tags)
|
||||||
|
return bool(tags_dict.get('name'))
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
# Check for direct name properties
|
||||||
|
name = row.get('name') or row.get('NAME') or row.get('FULLNAME')
|
||||||
|
return bool(name)
|
||||||
|
|
||||||
|
def load_geojson(self, filepath: str, filter_unnamed: bool = False) -> gpd.GeoDataFrame:
|
||||||
"""Load and validate GeoJSON file with optimizations."""
|
"""Load and validate GeoJSON file with optimizations."""
|
||||||
try:
|
try:
|
||||||
# Use pyogr engine for faster loading of large files
|
# Use pyogr engine for faster loading of large files
|
||||||
@@ -99,6 +122,15 @@ class RoadComparator:
|
|||||||
print(f"Fixing {invalid_mask.sum()} invalid geometries...")
|
print(f"Fixing {invalid_mask.sum()} invalid geometries...")
|
||||||
gdf.loc[invalid_mask, 'geometry'] = gdf.loc[invalid_mask, 'geometry'].buffer(0)
|
gdf.loc[invalid_mask, 'geometry'] = gdf.loc[invalid_mask, 'geometry'].buffer(0)
|
||||||
|
|
||||||
|
# Filter unnamed features if requested
|
||||||
|
if filter_unnamed:
|
||||||
|
original_count = len(gdf)
|
||||||
|
named_mask = gdf.apply(self._has_name, axis=1)
|
||||||
|
gdf = gdf[named_mask].copy()
|
||||||
|
gdf = gdf.reset_index(drop=True)
|
||||||
|
filtered_count = original_count - len(gdf)
|
||||||
|
print(f"Filtered out {filtered_count} unnamed features")
|
||||||
|
|
||||||
print(f"Loaded {len(gdf)} road features from {filepath}")
|
print(f"Loaded {len(gdf)} road features from {filepath}")
|
||||||
return gdf
|
return gdf
|
||||||
|
|
||||||
@@ -390,7 +422,8 @@ class RoadComparator:
|
|||||||
print("-" * 50)
|
print("-" * 50)
|
||||||
|
|
||||||
# Load both files
|
# Load both files
|
||||||
gdf1 = self.load_geojson(file1_path)
|
# Filter unnamed features from file1 (OSM data) if exclude_unnamed is set
|
||||||
|
gdf1 = self.load_geojson(file1_path, filter_unnamed=self.exclude_unnamed)
|
||||||
gdf2 = self.load_geojson(file2_path)
|
gdf2 = self.load_geojson(file2_path)
|
||||||
|
|
||||||
# Ensure both are in the same CRS
|
# Ensure both are in the same CRS
|
||||||
@@ -444,7 +477,7 @@ class RoadComparator:
|
|||||||
print(f"Minimum significant length: {self.min_gap_length_feet} feet")
|
print(f"Minimum significant length: {self.min_gap_length_feet} feet")
|
||||||
|
|
||||||
if removed:
|
if removed:
|
||||||
print(f"\n🔴 REMOVED ROADS ({len(removed)} segments):")
|
print(f"\nREMOVED ROADS ({len(removed)} segments):")
|
||||||
print("These road segments exist in File 1 but are missing or incomplete in File 2:")
|
print("These road segments exist in File 1 but are missing or incomplete in File 2:")
|
||||||
|
|
||||||
# Calculate total length of removed segments
|
# Calculate total length of removed segments
|
||||||
@@ -474,7 +507,7 @@ class RoadComparator:
|
|||||||
print(f" • {road}: {len(lengths)} segment(s), {road_total:,.1f} feet")
|
print(f" • {road}: {len(lengths)} segment(s), {road_total:,.1f} feet")
|
||||||
|
|
||||||
if added:
|
if added:
|
||||||
print(f"\n🔵 ADDED ROADS ({len(added)} roads):")
|
print(f"\nADDED ROADS ({len(added)} roads):")
|
||||||
print("These roads exist in File 2 but are missing or incomplete in File 1:")
|
print("These roads exist in File 2 but are missing or incomplete in File 1:")
|
||||||
|
|
||||||
# Calculate total length of added roads
|
# Calculate total length of added roads
|
||||||
@@ -504,7 +537,7 @@ class RoadComparator:
|
|||||||
print(f" • {road}: {length:,.1f} feet")
|
print(f" • {road}: {length:,.1f} feet")
|
||||||
|
|
||||||
if not removed and not added:
|
if not removed and not added:
|
||||||
print("\n✅ No significant differences found!")
|
print("\nNo significant differences found!")
|
||||||
print("The road networks have good coverage overlap within the specified tolerance.")
|
print("The road networks have good coverage overlap within the specified tolerance.")
|
||||||
|
|
||||||
|
|
||||||
@@ -532,6 +565,8 @@ Examples:
|
|||||||
help='Number of parallel processes (default: CPU count - 1)')
|
help='Number of parallel processes (default: CPU count - 1)')
|
||||||
parser.add_argument('--chunk-size', '-c', type=int, default=1000,
|
parser.add_argument('--chunk-size', '-c', type=int, default=1000,
|
||||||
help='Number of geometries to process per chunk (default: 1000)')
|
help='Number of geometries to process per chunk (default: 1000)')
|
||||||
|
parser.add_argument('--exclude-unnamed', '-e', action='store_true',
|
||||||
|
help='Exclude features without name tags from coverage calculation (helps detect roads covered by unnamed geometry)')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -550,7 +585,8 @@ Examples:
|
|||||||
tolerance_feet=args.tolerance,
|
tolerance_feet=args.tolerance,
|
||||||
min_gap_length_feet=args.min_length,
|
min_gap_length_feet=args.min_length,
|
||||||
n_jobs=args.jobs,
|
n_jobs=args.jobs,
|
||||||
chunk_size=args.chunk_size
|
chunk_size=args.chunk_size,
|
||||||
|
exclude_unnamed=args.exclude_unnamed
|
||||||
)
|
)
|
||||||
|
|
||||||
removed, added = comparator.compare_roads(args.file1, args.file2)
|
removed, added = comparator.compare_roads(args.file1, args.file2)
|
||||||
|
|||||||
Reference in New Issue
Block a user