Add and update scripts, finished Sumter

2025-07-28 18:49:39 -07:00 · 2025-07-28 18:49:39 -07:00 · ec0e4a6efb
commit ec0e4a6efb
parent 0cea39bc1d
12 changed files with 175287 additions and 11 deletions
--- a/README.md
+++ b/README.md
@ -4,8 +4,8 @@ See [https://wiki.openstreetmap.org/wiki/The_Villages_Road_and_Address_Import](h

 ## Data

-Lake County: https://c.lakecountyfl.gov/ftp/GIS/GisDownloads/Shapefiles/
-Sumter GIS is via emailing their GIS team and accessing their Dropbox.
+- Lake County Streets and Address Points: https://c.lakecountyfl.gov/ftp/GIS/GisDownloads/Shapefiles/
+- Sumter GIS Road Centerlines, Addresses, and Multi Modal Trails is via emailing their GIS team and accessing their Dropbox (https://www.dropbox.com/scl/fo/67nh5y8e42tr2kzdmmcg4/AAsF7Ay0MRUN-e_Ajlh5yWQ?rlkey=h6u606av0d2zkszk9lm3qijlt&e=1&st=7j7i94f8&dl=0)

 ## Instructions

@ -16,6 +16,7 @@ Sumter GIS is via emailing their GIS team and accessing their Dropbox.

 ### For Sumter County:

+* Always use the Filter with Form function to Select all entries with `"LIFECYCLE"='Current'`
 * For roads:
  * `NAME` becomes the virtual `name` via the `title(formatstreet("NAME"))`
  * `SpeedLimit` becomes the virtual `maxspeed` via `concat("SpeedLimit",' mph')`
@ -30,6 +31,8 @@ Sumter GIS is via emailing their GIS team and accessing their Dropbox.
  * `POST_CODE` becomes `addr:postcode` (or `addr:postc` temporarily) as an integer
  * Manually add `addr:state` = `'FL'`
 * For multi-modal trails (golf cart paths):
+  * Download all highway=path and highway=cycleway with golf_cart=yes for comparison
+  * Omit `Part_of_Ro`=`Yes` as separate paths; apply golf cart tagging to the streets directly.
  * `bicycle=yes`
  * `foot=yes`
  * `golf=cartpath`
@ -38,7 +41,6 @@ Sumter GIS is via emailing their GIS team and accessing their Dropbox.
  * `motor_vehicle=no`
  * `segregated=no`
  * `surface=asphalt`
-* Use the Filter with Form function to Select all entries with `"LIFECYCLE"='Current'`

 ### For Lake County:

@ -63,7 +65,7 @@ Sumter GIS is via emailing their GIS team and accessing their Dropbox.
 * Export to Geojson, only exporting **selected** entries, **selecting only the OSM-formatted fields we want**.
 * Here you can rename temporary columns like `addr:house` to `addr:housenumber`.
 * Ensure the export file is in the `EPSG:4326 - WGS84` CRS.
-* Open in JSOM. It's suggested to begin with roads first, addresses second, so the addresses can be placed in context.
+* Open in JOSM. It's suggested to begin with roads first, addresses second, so the addresses can be placed in context.
 * In the Roads dataset, select and remove all relations from the geojson/shapefile layer: the data often has one relation per road and this is improper for OSM import.
 * Select a small region to work on: one neighborhood or smaller. For this import, we are assuming that only newly-constructed small residential areas will be imported, not main roads or commercial areas or areas with significant existing map data.
 * Download the area you're working on from OSM, into a new Data Layer (not your geojson layer.)
--- a/data/Sumter/Multi_Model_Trails_020325.geojson
+++ b/data/Sumter/Multi_Model_Trails_020325.geojson
--- a/data/Sumter/addresses-250415.geojson
+++ b/data/Sumter/addresses-250415.geojson
--- a/data/Sumter/diff-sumter-cartpaths-20250725.geojson
+++ b/data/Sumter/diff-sumter-cartpaths-20250725.geojson
--- a/data/Sumter/diff-sumter-roads-6-28-25.geojson
+++ b/data/Sumter/diff-sumter-roads-6-28-25.geojson
--- a/data/Sumter/diff-sumter-roads.geojson
+++ b/data/Sumter/diff-sumter-roads.geojson
--- a/data/Sumter/osm-cartpaths-20250725.geojson
+++ b/data/Sumter/osm-cartpaths-20250725.geojson
--- a/qgis-functions.py
+++ b/qgis-functions.py
@ -8,12 +8,19 @@ import re
 # or >1 suffix-letters, like 12th Street or 243rd Ave.
 #

+def title(s):
+    return re.sub(
+        r"[A-Za-z0-9]+('[A-Za-z0-9]+)?",
+        lambda word: word.group(0).capitalize(),
+        s)
+
 # @qgsfunction(args='auto', group='Custom', referenced_columns=[])
 def getstreetfromaddress(value1, feature, parent):
    parts = value1.split()
    parts.pop(0) # Ignore the first bit (i.e. "123" in "123 N MAIN ST")
-    parts = map(formatstreetname, parts)
-    return " ".join(parts)
+    #parts = map(formatstreetname, parts)
+    #return " ".join(parts)
+    return formatstreet(" ".join(parts), None, None)

 # @qgsfunction(args='auto', group='Custom', referenced_columns=[])
 def formatstreet(value1, feature, parent):
@ -26,10 +33,12 @@ def formatstreet(value1, feature, parent):
        parts[0] = "Royal"
        parts[1] = "Saint"
    # And "CR" as a first part (County Road) vs last part (Circle)
+    if parts[0].upper() == "C ":
+        parts[0] = "County Road "
    if parts[0].upper() == "CR":
        parts[0] = "County Road"
    if parts[0].upper() == "SR":
-        parts[0] = "State Route"
+        parts[0] = "State Road"
    parts = map(formatstreetname, parts)
    return " ".join(parts)

--- a/shp-to-geojson.py
+++ b/shp-to-geojson.py
@ -1,5 +1,75 @@
 import geopandas
-df = geopandas.read_file('original data/Sumter/RoadCenterlines_041125.shp.zip')
-df = df.to_crs(4326) # Convert to WGS 84
-exploded = df.explode()
-exploded.to_file('original data/Sumter/RoadCenterlines_041125.geojson', driver='GeoJSON')
+import sys
+import os
+from pathlib import Path
+
+
+def convert_shapefile_to_geojson(
+    input_shapefile, 
+    output_geojson, 
+    target_crs=4326 # Convert to WGS 84
+):
+    """
+    Main conversion function
+    
+    Args:
+        input_shapefile: Path to input shapefile
+        output_geojson: Path to output GeoJSON file
+        target_crs: Target coordinate reference system
+    """
+    try:
+        # Read shapefile
+        print(f"Reading shapefile: {input_shapefile}")
+        df = geopandas.read_file(input_shapefile)
+        print(f"Converting to CRS {target_crs}")
+        df = df.to_crs(target_crs)
+        exploded = df.explode()
+        exploded.to_file(output_geojson, driver='GeoJSON')
+    except Exception as e:
+        print(f"Error during conversion: {str(e)}")
+        sys.exit(1)
+
+def main():
+    """
+    Main function to handle command line arguments
+    """
+    import argparse
+    
+    parser = argparse.ArgumentParser(
+        description='Convert shapefile to GeoJSON'
+    )
+    parser.add_argument(
+        'input_shapefile', 
+        help='Path to input shapefile'
+    )
+    parser.add_argument(
+        'output_geojson', 
+        help='Path to output GeoJSON file'
+    )
+    parser.add_argument(
+        '--target-crs', 
+        default='4326',
+        help='Target coordinate reference system (default: 4326)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Validate input file
+    if not os.path.exists(args.input_shapefile):
+        print(f"Error: Input shapefile '{args.input_shapefile}' not found")
+        sys.exit(1)
+    
+    # Create output directory if it doesn't exist
+    output_dir = Path(args.output_geojson).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Run conversion
+    convert_shapefile_to_geojson(
+        args.input_shapefile,
+        args.output_geojson,
+        args.target_crs
+    )
+
+if __name__ == "__main__":
+    import pandas as pd
+    main()
--- a/sumter-address-convert.py
+++ b/sumter-address-convert.py
@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Shapefile to GeoJSON Converter for Address Data
+Converts ESRI:102659 CRS shapefile to EPSG:4326 GeoJSON with OSM-style address tags
+"""
+
+import geopandas as gpd
+import json
+import sys
+import os
+from pathlib import Path
+
+import importlib  
+qgis_functions = importlib.import_module("qgis-functions")
+title = qgis_functions.title
+getstreetfromaddress = qgis_functions.getstreetfromaddress
+
+def convert_crs(gdf, source_crs='ESRI:102659', target_crs='EPSG:4326'):
+    """
+    Convert coordinate reference system from source to target CRS
+    
+    Args:
+        gdf: GeoDataFrame to convert
+        source_crs: Source coordinate reference system (default: ESRI:102659)
+        target_crs: Target coordinate reference system (default: EPSG:4326)
+    
+    Returns:
+        GeoDataFrame with converted CRS
+    """
+    if gdf.crs is None:
+        print(f"Warning: No CRS detected, assuming {source_crs}")
+        gdf.crs = source_crs
+    
+    if gdf.crs != target_crs:
+        print(f"Converting from {gdf.crs} to {target_crs}")
+        gdf = gdf.to_crs(target_crs)
+    
+    return gdf
+
+def process_address_fields(gdf):
+    """
+    Process and map address fields according to OSM address schema
+    
+    Args:
+        gdf: GeoDataFrame with address data
+    
+    Returns:
+        GeoDataFrame with processed address fields
+    """
+    processed_gdf = gdf.copy()
+    
+    # Create new columns for OSM address tags
+    address_mapping = {}
+    
+    # ADD_NUM -> addr:housenumber (as integer)
+    if 'ADD_NUM' in processed_gdf.columns:
+        # Handle NaN values and convert to nullable integer
+        add_num_series = processed_gdf['ADD_NUM'].copy()
+        # Convert to numeric, coercing errors to NaN
+        add_num_series = pd.to_numeric(add_num_series, errors='coerce')
+        # Round to remove decimal places, then convert to nullable integer
+        address_mapping['addr:housenumber'] = add_num_series.round().astype('Int64')
+    
+    # UNIT -> addr:unit (as string)
+    if 'UNIT' in processed_gdf.columns:
+        unit_series = processed_gdf['UNIT'].copy()
+        # Replace NaN, empty strings, and 'None' string with actual None
+        unit_series = unit_series.replace(['nan', 'None', '', None], None)
+        # Only keep non-null values as strings
+        unit_series = unit_series.where(unit_series.notna(), None)
+        address_mapping['addr:unit'] = unit_series
+
+    # SADD -> addr:street via title(getstreetfromaddress("SADD"))
+    if 'SADD' in processed_gdf.columns:
+        street_names = []
+        for sadd_value in processed_gdf['SADD']:
+            if pd.notna(sadd_value):
+                street_from_addr = getstreetfromaddress(str(sadd_value), None, None)
+                street_titled = title(street_from_addr)
+                street_names.append(street_titled)
+            else:
+                street_names.append(None)
+        address_mapping['addr:street'] = street_names
+    
+    # POST_COMM -> addr:city via title("POST_COMM")
+    if 'POST_COMM' in processed_gdf.columns:
+        city_names = []
+        for post_comm in processed_gdf['POST_COMM']:
+            if pd.notna(post_comm):
+                city_titled = title(str(post_comm))
+                city_names.append(city_titled)
+            else:
+                city_names.append(None)
+        address_mapping['addr:city'] = city_names
+    
+    # POST_CODE -> addr:postcode (as integer)
+    if 'POST_CODE' in processed_gdf.columns:
+        # Handle NaN values and convert to nullable integer
+        post_code_series = processed_gdf['POST_CODE'].copy()
+        # Convert to numeric, coercing errors to NaN
+        post_code_series = pd.to_numeric(post_code_series, errors='coerce')
+        # Round to remove decimal places, then convert to nullable integer
+        address_mapping['addr:postcode'] = post_code_series.round().astype('Int64')
+    
+    # Manually add addr:state = 'FL'
+    address_mapping['addr:state'] = 'FL'
+    
+    # Add the new address columns to the GeoDataFrame
+    for key, value in address_mapping.items():
+        processed_gdf[key] = value
+    
+    return processed_gdf
+
+def clean_output_data(gdf, keep_original_fields=False):
+    """
+    Clean the output data, optionally keeping original fields
+    
+    Args:
+        gdf: GeoDataFrame to clean
+        keep_original_fields: Whether to keep original shapefile fields
+    
+    Returns:
+        Cleaned GeoDataFrame
+    """
+    # Define the OSM address fields we want to keep
+    osm_fields = [
+        'addr:housenumber', 'addr:unit', 'addr:street', 
+        'addr:city', 'addr:postcode', 'addr:state'
+    ]
+    
+    if keep_original_fields:
+        # Keep both original and OSM fields
+        original_fields = ['ADD_NUM', 'UNIT', 'SADD', 'POST_COMM', 'POST_CODE']
+        fields_to_keep = list(set(osm_fields + original_fields + ['geometry']))
+    else:
+        # Keep only OSM fields and geometry
+        fields_to_keep = osm_fields + ['geometry']
+    
+    # Filter to only existing columns
+    existing_fields = [field for field in fields_to_keep if field in gdf.columns]
+    
+    return gdf[existing_fields]
+
+def convert_shapefile_to_geojson(
+    input_shapefile, 
+    output_geojson, 
+    keep_original_fields=False,
+    source_crs='ESRI:102659',
+    target_crs='EPSG:4326'
+):
+    """
+    Main conversion function
+    
+    Args:
+        input_shapefile: Path to input shapefile
+        output_geojson: Path to output GeoJSON file
+        keep_original_fields: Whether to keep original shapefile fields
+        source_crs: Source coordinate reference system
+        target_crs: Target coordinate reference system
+    """
+    try:
+        # Read shapefile
+        print(f"Reading shapefile: {input_shapefile}")
+        gdf = gpd.read_file(input_shapefile)
+        print(f"Loaded {len(gdf)} features")
+        
+        # Display original columns
+        print(f"Original columns: {list(gdf.columns)}")
+        
+        # Convert CRS if needed
+        gdf = convert_crs(gdf, source_crs, target_crs)
+        
+        # Process address fields
+        print("Processing address fields...")
+        gdf = process_address_fields(gdf)
+        
+        # Clean output data
+        gdf = clean_output_data(gdf, keep_original_fields)
+        
+        # Remove rows with no valid geometry
+        gdf = gdf[gdf.geometry.notna()]
+        
+        print(f"Final columns: {list(gdf.columns)}")
+        print(f"Final feature count: {len(gdf)}")
+        
+        # Write to GeoJSON
+        print(f"Writing GeoJSON: {output_geojson}")
+        gdf.to_file(output_geojson, driver='GeoJSON')
+        
+        print(f"Conversion completed successfully!")
+        
+        # Display sample of processed data
+        if len(gdf) > 0:
+            print("\nSample of processed data:")
+            sample_cols = [col for col in gdf.columns if col.startswith('addr:')]
+            if sample_cols:
+                print(gdf[sample_cols].head())
+    
+    except Exception as e:
+        print(f"Error during conversion: {str(e)}")
+        sys.exit(1)
+
+def main():
+    """
+    Main function to handle command line arguments
+    """
+    import argparse
+    
+    parser = argparse.ArgumentParser(
+        description='Convert shapefile to GeoJSON with OSM address tags'
+    )
+    parser.add_argument(
+        'input_shapefile', 
+        help='Path to input shapefile'
+    )
+    parser.add_argument(
+        'output_geojson', 
+        help='Path to output GeoJSON file'
+    )
+    parser.add_argument(
+        '--keep-original', 
+        action='store_true',
+        help='Keep original shapefile fields in addition to OSM fields'
+    )
+    parser.add_argument(
+        '--source-crs', 
+        default='ESRI:102659',
+        help='Source coordinate reference system (default: ESRI:102659)'
+    )
+    parser.add_argument(
+        '--target-crs', 
+        default='EPSG:4326',
+        help='Target coordinate reference system (default: EPSG:4326)'
+    )
+    
+    args = parser.parse_args()
+    
+    # Validate input file
+    if not os.path.exists(args.input_shapefile):
+        print(f"Error: Input shapefile '{args.input_shapefile}' not found")
+        sys.exit(1)
+    
+    # Create output directory if it doesn't exist
+    output_dir = Path(args.output_geojson).parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Run conversion
+    convert_shapefile_to_geojson(
+        args.input_shapefile,
+        args.output_geojson,
+        args.keep_original,
+        args.source_crs,
+        args.target_crs
+    )
+
+if __name__ == "__main__":
+    import pandas as pd
+    main()
--- a/sumter-multi-modal-convert.py
+++ b/sumter-multi-modal-convert.py
@ -0,0 +1,540 @@
+#!/usr/bin/env python3
+"""
+GeoJSON Multi Modal Golf Cart Path Comparison Script
+
+Compares two GeoJSON files containing road data and identifies:
+1. Roads in file1 that don't have corresponding coverage in file2 (removed roads)
+2. Roads in file2 that don't have corresponding coverage in file1 (added roads)
+
+Only reports differences that are significant (above minimum length threshold).
+Optimized for performance with parallel processing and spatial indexing.
+
+TODO:
+- put properties properly on removed roads, so they're visible in JOSM
+- handle polygons properly (on previous geojson step?) for circular roads
+"""
+
+import json
+import argparse
+from pathlib import Path
+from typing import List, Dict, Any, Tuple
+import geopandas as gpd
+from shapely.geometry import LineString, MultiLineString, Point, Polygon
+from shapely.ops import unary_union
+from shapely.strtree import STRtree
+import pandas as pd
+import warnings
+import multiprocessing as mp
+from functools import partial
+import numpy as np
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import gc
+
+# Suppress warnings for cleaner output
+warnings.filterwarnings('ignore')
+
+class RoadComparator:
+    def __init__(self, tolerance_feet: float = 50.0, min_gap_length_feet: float = 100.0, 
+                 n_jobs: int = None, chunk_size: int = 1000):
+        """
+        Initialize the road comparator.
+        
+        Args:
+            tolerance_feet: Distance tolerance for considering roads as overlapping (default: 50 feet)
+            min_gap_length_feet: Minimum length of gap/extra to be considered significant (default: 100 feet)
+            n_jobs: Number of parallel processes to use (default: CPU count - 1)
+            chunk_size: Number of geometries to process per chunk (default: 1000)
+        """
+        self.tolerance_feet = tolerance_feet
+        self.min_gap_length_feet = min_gap_length_feet
+        self.n_jobs = n_jobs or max(1, mp.cpu_count() - 1)
+        self.chunk_size = chunk_size
+        
+        # Convert feet to degrees (approximate conversion for continental US)
+        # 1 degree latitude ≈ 364,000 feet
+        # 1 degree longitude ≈ 288,000 feet (at 40° latitude)
+        self.tolerance_deg = tolerance_feet / 364000.0
+        self.min_gap_length_deg = min_gap_length_feet / 364000.0
+        
+        print(f"Using {self.n_jobs} parallel processes with chunk size {self.chunk_size}")
+    
+    def load_geojson(self, filepath: str) -> gpd.GeoDataFrame:
+        """Load and validate GeoJSON file with optimizations."""
+        try:
+            # Use pyogr engine for faster loading of large files
+            gdf = gpd.read_file(filepath, engine='pyogrio')
+            
+            # Filter only LineString, MultiLineString, and Polygon geometries
+            line_types = ['LineString', 'MultiLineString', 'Polygon']
+            gdf = gdf[gdf.geometry.type.isin(line_types)].copy()
+            
+            if len(gdf) == 0:
+                raise ValueError(f"No line geometries found in {filepath}")
+            
+            # Reset index for efficient processing
+            gdf = gdf.reset_index(drop=True)
+            
+            # Ensure geometry is valid and fix simple issues
+            invalid_mask = ~gdf.geometry.is_valid
+            if invalid_mask.any():
+                print(f"Fixing {invalid_mask.sum()} invalid geometries...")
+                gdf.loc[invalid_mask, 'geometry'] = gdf.loc[invalid_mask, 'geometry'].buffer(0)
+            
+            print(f"Loaded {len(gdf)} road features from {filepath}")
+            return gdf
+            
+        except Exception as e:
+            raise Exception(f"Error loading {filepath}: {str(e)}")
+    
+    def create_buffered_union_optimized(self, gdf: gpd.GeoDataFrame) -> Any:
+        """Create a buffered union using chunked processing for memory efficiency."""
+        print("Creating optimized buffered union...")
+        
+        # Process in chunks to manage memory
+        chunks = [gdf.iloc[i:i+self.chunk_size] for i in range(0, len(gdf), self.chunk_size)]
+        chunk_unions = []
+        
+        # Use partial function for multiprocessing
+        buffer_func = partial(self._buffer_chunk, tolerance=self.tolerance_deg)
+        
+        with ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
+            # Submit all chunk processing jobs
+            future_to_chunk = {executor.submit(buffer_func, chunk): i 
+                              for i, chunk in enumerate(chunks)}
+            
+            # Collect results as they complete
+            for future in as_completed(future_to_chunk):
+                chunk_idx = future_to_chunk[future]
+                try:
+                    chunk_union = future.result()
+                    if chunk_union and not chunk_union.is_empty:
+                        chunk_unions.append(chunk_union)
+                    print(f"Processed chunk {chunk_idx + 1}/{len(chunks)}")
+                except Exception as e:
+                    print(f"Error processing chunk {chunk_idx}: {str(e)}")
+        
+        # Union all chunk results
+        print("Combining chunk unions...")
+        if chunk_unions:
+            final_union = unary_union(chunk_unions)
+            # Force garbage collection
+            del chunk_unions
+            gc.collect()
+            return final_union
+        else:
+            raise Exception("No valid geometries to create union")
+    
+    @staticmethod
+    def _buffer_chunk(chunk_gdf: gpd.GeoDataFrame, tolerance: float) -> Any:
+        """Buffer geometries in a chunk and return their union."""
+        try:
+            # Buffer all geometries in the chunk
+            buffered = chunk_gdf.geometry.buffer(tolerance)
+            
+            # Create union of buffered geometries
+            if len(buffered) == 1:
+                return buffered.iloc[0]
+            else:
+                return unary_union(buffered.tolist())
+        except Exception as e:
+            print(f"Error in chunk processing: {str(e)}")
+            return None
+    
+    def create_spatial_index(self, gdf: gpd.GeoDataFrame) -> STRtree:
+        """Create spatial index for fast intersection queries."""
+        print("Creating spatial index...")
+        # Create STRtree for fast spatial queries
+        geometries = gdf.geometry.tolist()
+        return STRtree(geometries)
+    
+    def find_removed_segments_optimized(self, source_gdf: gpd.GeoDataFrame, 
+                                       target_union: Any) -> List[Dict[str, Any]]:
+        """
+        Find segments in source_gdf that are not covered by target_union (removed roads).
+        Optimized with parallel processing.
+        """
+        print("Finding removed segments...")
+        
+        # Split into chunks for parallel processing
+        chunks = [source_gdf.iloc[i:i+self.chunk_size] 
+                 for i in range(0, len(source_gdf), self.chunk_size)]
+        
+        all_removed = []
+        
+        # Use partial function for multiprocessing
+        process_func = partial(self._process_removed_chunk, 
+                              target_union=target_union,
+                              min_length_deg=self.min_gap_length_deg)
+        
+        with ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
+            # Submit all chunk processing jobs
+            future_to_chunk = {executor.submit(process_func, chunk): i 
+                              for i, chunk in enumerate(chunks)}
+            
+            # Collect results as they complete
+            for future in as_completed(future_to_chunk):
+                chunk_idx = future_to_chunk[future]
+                try:
+                    chunk_removed = future.result()
+                    all_removed.extend(chunk_removed)
+                    print(f"Processed removed chunk {chunk_idx + 1}/{len(chunks)}")
+                except Exception as e:
+                    print(f"Error processing removed chunk {chunk_idx}: {str(e)}")
+        
+        return all_removed
+    
+    @staticmethod
+    def _process_removed_chunk(chunk_gdf: gpd.GeoDataFrame, target_union: Any, 
+                              min_length_deg: float) -> List[Dict[str, Any]]:
+        """Process a chunk of geometries to find removed segments."""
+        removed_segments = []
+        
+        for idx, row in chunk_gdf.iterrows():
+            geom = row.geometry
+            
+            # Handle MultiLineString by processing each component
+            if isinstance(geom, MultiLineString):
+                lines = list(geom.geoms)
+            else:
+                lines = [geom] # Polygon and Line can be accessed directly
+            
+            for line in lines:
+                try:
+                    # Find parts of the line that don't intersect with target_union
+                    uncovered = line.difference(target_union)
+                    
+                    if uncovered.is_empty:
+                        continue
+                    
+                    # Handle different geometry types returned by difference
+                    uncovered_lines = []
+                    if hasattr(uncovered, 'geoms'):
+                        for geom_part in uncovered.geoms:
+                            if isinstance(geom_part, LineString):
+                                uncovered_lines.append(geom_part)
+                    elif isinstance(uncovered, LineString):
+                        uncovered_lines.append(uncovered)
+                    
+                    # Check each uncovered line segment
+                    for uncovered_line in uncovered_lines:
+                        if uncovered_line.length >= min_length_deg:
+                            # Create properties dict with original metadata plus 'removed: true'
+                            properties = dict(row.drop('geometry'))
+                            properties['removed'] = True
+                            
+                            removed_segments.append({
+                                'geometry': uncovered_line,
+                                **properties
+                            })
+                            
+                except Exception as e:
+                    continue  # Skip problematic geometries
+        
+        return removed_segments
+    
+    def find_added_roads_optimized(self, source_gdf: gpd.GeoDataFrame, 
+                                  target_union: Any) -> List[Dict[str, Any]]:
+        """
+        Find entire roads in source_gdf that don't significantly overlap with target_union.
+        Optimized with parallel processing.
+        """
+        print("Finding added roads...")
+        
+        # Split into chunks for parallel processing
+        chunks = [source_gdf.iloc[i:i+self.chunk_size] 
+                 for i in range(0, len(source_gdf), self.chunk_size)]
+        
+        all_added = []
+        
+        # Use partial function for multiprocessing
+        process_func = partial(self._process_added_chunk, 
+                              target_union=target_union,
+                              min_length_deg=self.min_gap_length_deg)
+        
+        with ProcessPoolExecutor(max_workers=self.n_jobs) as executor:
+            # Submit all chunk processing jobs
+            future_to_chunk = {executor.submit(process_func, chunk): i 
+                              for i, chunk in enumerate(chunks)}
+            
+            # Collect results as they complete
+            for future in as_completed(future_to_chunk):
+                chunk_idx = future_to_chunk[future]
+                try:
+                    chunk_added = future.result()
+                    all_added.extend(chunk_added)
+                    print(f"Processed added chunk {chunk_idx + 1}/{len(chunks)}")
+                except Exception as e:
+                    print(f"Error processing added chunk {chunk_idx}: {str(e)}")
+        
+        return all_added
+    
+    @staticmethod
+    def _process_added_chunk(chunk_gdf: gpd.GeoDataFrame, target_union: Any, 
+                            min_length_deg: float) -> List[Dict[str, Any]]:
+        """Process a chunk of geometries to find added roads."""
+        added_roads = []
+        
+        for idx, row in chunk_gdf.iterrows():
+            geom = row.geometry
+            
+            try:
+                # Check what portion of the road is not covered
+                uncovered = geom.difference(target_union)
+                
+                if not uncovered.is_empty:
+                    # Calculate what percentage of the original road is uncovered
+                    uncovered_length = 0
+                    if hasattr(uncovered, 'geoms'):
+                        for geom_part in uncovered.geoms:
+                            if isinstance(geom_part, LineString):
+                                uncovered_length += geom_part.length
+                    elif isinstance(uncovered, LineString):
+                        uncovered_length = uncovered.length
+                    
+                    original_length = geom.length
+                    uncovered_ratio = uncovered_length / original_length if original_length > 0 else 0
+                    
+                    # Include the entire road if:
+                    # 1. The uncovered portion is above minimum threshold, AND
+                    # 2. More than 10% of the road is uncovered
+                    if uncovered_ratio > 0.1:
+                        #uncovered_length >= min_length_deg and 
+                        # Include entire original road with all original metadata
+                        original_properties = dict(row.drop('geometry'))
+
+                        #
+                        # For Sumter County Roads
+                        #
+                        properties = {
+                            'surface': 'asphalt'
+                        }
+
+                        output = True
+
+                        for key, value in original_properties.items():
+                            if key == 'Part_of_Ro' and value == "Yes":
+                                output = False
+                                continue # Skip cart paths that are parts of roads
+                            else:
+                                properties['highway'] = 'residential'
+                                properties['bicycle'] = 'yes'
+                                properties['foot'] = 'yes'
+                                properties['golf'] = 'cartpath'
+                                properties['golf_cart'] = 'yes'
+                                properties['highway'] = 'path'
+                                properties['motor_vehicle'] = 'no'
+                                properties['segregated'] = 'no'
+                                properties['surface'] = 'asphalt'
+
+                        if output:
+                            added_roads.append({
+                                'geometry': geom,
+                                **properties
+                            })
+                        
+            except Exception as e:
+                print(e)
+                continue  # Skip problematic geometries
+        
+        return added_roads
+    
+    def compare_roads(self, file1_path: str, file2_path: str) -> Tuple[List[Dict], List[Dict]]:
+        """
+        Compare two GeoJSON files and find significant differences.
+        Optimized version with parallel processing.
+        
+        Returns:
+            Tuple of (removed_roads, added_roads)
+        """
+        print(f"Comparing {file1_path} and {file2_path}")
+        print(f"Tolerance: {self.tolerance_feet} feet")
+        print(f"Minimum significant length: {self.min_gap_length_feet} feet")
+        print(f"Parallel processing: {self.n_jobs} workers")
+        print("-" * 50)
+        
+        # Load both files
+        gdf1 = self.load_geojson(file1_path)
+        gdf2 = self.load_geojson(file2_path)
+        
+        # Ensure both are in the same CRS
+        if gdf1.crs != gdf2.crs:
+            print(f"Warning: CRS mismatch. Converting {file2_path} to match {file1_path}")
+            gdf2 = gdf2.to_crs(gdf1.crs)
+        
+        print("Creating optimized spatial unions...")
+        
+        # Create buffered unions using optimized method
+        union1 = self.create_buffered_union_optimized(gdf1)
+        union2 = self.create_buffered_union_optimized(gdf2)
+        
+        print("Finding removed and added roads with parallel processing...")
+        
+        # Find roads using optimized parallel methods
+        removed_roads = self.find_removed_segments_optimized(gdf1, union2)
+        added_roads = self.find_added_roads_optimized(gdf2, union1)
+        
+        # Clean up memory
+        del gdf1, gdf2, union1, union2
+        gc.collect()
+        
+        return removed_roads, added_roads
+    
+    def save_results(self, removed: List[Dict], added: List[Dict], output_path: str):
+        """Save results to GeoJSON file."""
+        all_results = removed + added
+        
+        if not all_results:
+            print("No significant differences found!")
+            return
+        
+        # Create GeoDataFrame efficiently
+        print("Saving results...")
+        results_gdf = gpd.GeoDataFrame(all_results)
+        
+        # Save to file with optimization
+        results_gdf.to_file(output_path, driver='GeoJSON', engine='pyogrio')
+        print(f"Results saved to: {output_path}")
+    
+    def print_summary(self, removed: List[Dict], added: List[Dict], file1_name: str, file2_name: str):
+        """Print a summary of the comparison results."""
+        print("\n" + "="*60)
+        print("COMPARISON SUMMARY")
+        print("="*60)
+        
+        print(f"\nFile 1: {file1_name}")
+        print(f"File 2: {file2_name}")
+        print(f"Tolerance: {self.tolerance_feet} feet")
+        print(f"Minimum significant length: {self.min_gap_length_feet} feet")
+        
+        if removed:
+            print(f"\n🔴 REMOVED ROADS ({len(removed)} segments):")
+            print("These road segments exist in File 1 but are missing or incomplete in File 2:")
+            
+            # Calculate total length of removed segments
+            total_removed_length = 0
+            removed_by_road = {}
+            
+            for segment in removed:
+                geom = segment['geometry']
+                length_feet = geom.length * 364000.0  # Convert to feet
+                total_removed_length += length_feet
+                
+                # Get road name
+                road_name = "Unknown"
+                name_fields = ['name', 'NAME', 'road_name', 'street_name', 'FULLNAME']
+                for field in name_fields:
+                    if field in segment and pd.notna(segment[field]):
+                        road_name = str(segment[field])
+                        break
+                if road_name not in removed_by_road:
+                    removed_by_road[road_name] = []
+                removed_by_road[road_name].append(length_feet)
+            
+            print(f"Total removed length: {total_removed_length:,.1f} feet ({total_removed_length/5280:.2f} miles)")
+            
+            for road, lengths in sorted(removed_by_road.items()):
+                road_total = sum(lengths)
+                print(f"  • {road}: {len(lengths)} segment(s), {road_total:,.1f} feet")
+        
+        if added:
+            print(f"\n🔵 ADDED ROADS ({len(added)} roads):")
+            print("These roads exist in File 2 but are missing or incomplete in File 1:")
+            
+            # Calculate total length of added roads
+            total_added_length = 0
+            added_by_road = {}
+            
+            for road in added:
+                geom = road['geometry']
+                length_feet = geom.length * 364000.0  # Convert to feet
+                total_added_length += length_feet
+                
+                # Get road name
+                road_name = "Unknown"
+                name_fields = ['name', 'NAME', 'road_name', 'street_name', 'FULLNAME']
+                for field in name_fields:
+                    if field in road and pd.notna(road[field]):
+                        road_name = str(road[field])
+                        break
+                
+                if road_name not in added_by_road:
+                    added_by_road[road_name] = 0
+                added_by_road[road_name] += length_feet
+            
+            print(f"Total added length: {total_added_length:,.1f} feet ({total_added_length/5280:.2f} miles)")
+            
+            for road, length in sorted(added_by_road.items()):
+                print(f"  • {road}: {length:,.1f} feet")
+        
+        if not removed and not added:
+            print("\n✅ No significant differences found!")
+            print("The road networks have good coverage overlap within the specified tolerance.")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare two GeoJSON files containing roads and find significant gaps or extras (Optimized)",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python sumter-multi-modal-convert.py osm-multi-modal.geojson county-multi-modal.geojson
+  python sumter-multi-modal-convert.py osm-multi-modal.geojson county-multi-modal.geojson --tolerance 100 --min-length 200
+  python sumter-multi-modal-convert.py osm-multi-modal.geojson county-multi-modal.geojson --output differences.geojson
+  python sumter-multi-modal-convert.py osm-multi-modal.geojson county-multi-modal.geojson --jobs 8 --chunk-size 2000
+        """
+    )
+    
+    parser.add_argument('file1', help='First GeoJSON file')
+    parser.add_argument('file2', help='Second GeoJSON file')
+    parser.add_argument('--tolerance', '-t', type=float, default=50.0,
+                       help='Distance tolerance in feet for considering roads as overlapping (default: 50)')
+    parser.add_argument('--min-length', '-m', type=float, default=100.0,
+                       help='Minimum length in feet for gaps/extras to be considered significant (default: 100)')
+    parser.add_argument('--output', '-o', help='Output GeoJSON file for results (optional)')
+    parser.add_argument('--jobs', '-j', type=int, default=None,
+                       help='Number of parallel processes (default: CPU count - 1)')
+    parser.add_argument('--chunk-size', '-c', type=int, default=1000,
+                       help='Number of geometries to process per chunk (default: 1000)')
+    
+    args = parser.parse_args()
+    
+    # Validate input files
+    if not Path(args.file1).exists():
+        print(f"Error: File {args.file1} does not exist")
+        return 1
+    
+    if not Path(args.file2).exists():
+        print(f"Error: File {args.file2} does not exist")
+        return 1
+    
+    try:
+        # Create comparator and run comparison
+        comparator = RoadComparator(
+            tolerance_feet=args.tolerance,
+            min_gap_length_feet=args.min_length,
+            n_jobs=args.jobs,
+            chunk_size=args.chunk_size
+        )
+        
+        removed, added = comparator.compare_roads(args.file1, args.file2)
+        
+        # Print summary
+        comparator.print_summary(removed, added, args.file1, args.file2)
+        
+        # Save results if output file specified
+        if args.output:
+            comparator.save_results(removed, added, args.output)
+        elif removed or added:
+            # Auto-generate output filename if differences found
+            output_file = f"multi_modal_differences_{Path(args.file1).stem}_vs_{Path(args.file2).stem}.geojson"
+            comparator.save_results(removed, added, output_file)
+        
+        return 0
+        
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return 1
+
+
+if __name__ == "__main__":
+    exit(main())
--- a/threaded.py
+++ b/threaded.py
@ -14,6 +14,13 @@ TODO:
 - handle polygons properly (on previous geojson step?) for circular roads
 - ignore roads that aren't LIFECYCLE ACTV or Active
 - include OneWay=Y
+- handle C 44a -> County Road 44A
+- handle Tpke -> Turnpike
+- handle Trce -> Trace/Terrace?
+- handle Cor -> Corner
+- handle Obrien -> O'Brien
+- handle Oday -> O'Day
+- Ohara -> O'Hara
 """

 import json