Add and update scripts, finished Sumter
This commit is contained in:
258
sumter-address-convert.py
Normal file
258
sumter-address-convert.py
Normal file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Shapefile to GeoJSON Converter for Address Data
|
||||
Converts ESRI:102659 CRS shapefile to EPSG:4326 GeoJSON with OSM-style address tags
|
||||
"""
|
||||
|
||||
import geopandas as gpd
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import importlib
|
||||
qgis_functions = importlib.import_module("qgis-functions")
|
||||
title = qgis_functions.title
|
||||
getstreetfromaddress = qgis_functions.getstreetfromaddress
|
||||
|
||||
def convert_crs(gdf, source_crs='ESRI:102659', target_crs='EPSG:4326'):
|
||||
"""
|
||||
Convert coordinate reference system from source to target CRS
|
||||
|
||||
Args:
|
||||
gdf: GeoDataFrame to convert
|
||||
source_crs: Source coordinate reference system (default: ESRI:102659)
|
||||
target_crs: Target coordinate reference system (default: EPSG:4326)
|
||||
|
||||
Returns:
|
||||
GeoDataFrame with converted CRS
|
||||
"""
|
||||
if gdf.crs is None:
|
||||
print(f"Warning: No CRS detected, assuming {source_crs}")
|
||||
gdf.crs = source_crs
|
||||
|
||||
if gdf.crs != target_crs:
|
||||
print(f"Converting from {gdf.crs} to {target_crs}")
|
||||
gdf = gdf.to_crs(target_crs)
|
||||
|
||||
return gdf
|
||||
|
||||
def process_address_fields(gdf):
|
||||
"""
|
||||
Process and map address fields according to OSM address schema
|
||||
|
||||
Args:
|
||||
gdf: GeoDataFrame with address data
|
||||
|
||||
Returns:
|
||||
GeoDataFrame with processed address fields
|
||||
"""
|
||||
processed_gdf = gdf.copy()
|
||||
|
||||
# Create new columns for OSM address tags
|
||||
address_mapping = {}
|
||||
|
||||
# ADD_NUM -> addr:housenumber (as integer)
|
||||
if 'ADD_NUM' in processed_gdf.columns:
|
||||
# Handle NaN values and convert to nullable integer
|
||||
add_num_series = processed_gdf['ADD_NUM'].copy()
|
||||
# Convert to numeric, coercing errors to NaN
|
||||
add_num_series = pd.to_numeric(add_num_series, errors='coerce')
|
||||
# Round to remove decimal places, then convert to nullable integer
|
||||
address_mapping['addr:housenumber'] = add_num_series.round().astype('Int64')
|
||||
|
||||
# UNIT -> addr:unit (as string)
|
||||
if 'UNIT' in processed_gdf.columns:
|
||||
unit_series = processed_gdf['UNIT'].copy()
|
||||
# Replace NaN, empty strings, and 'None' string with actual None
|
||||
unit_series = unit_series.replace(['nan', 'None', '', None], None)
|
||||
# Only keep non-null values as strings
|
||||
unit_series = unit_series.where(unit_series.notna(), None)
|
||||
address_mapping['addr:unit'] = unit_series
|
||||
|
||||
# SADD -> addr:street via title(getstreetfromaddress("SADD"))
|
||||
if 'SADD' in processed_gdf.columns:
|
||||
street_names = []
|
||||
for sadd_value in processed_gdf['SADD']:
|
||||
if pd.notna(sadd_value):
|
||||
street_from_addr = getstreetfromaddress(str(sadd_value), None, None)
|
||||
street_titled = title(street_from_addr)
|
||||
street_names.append(street_titled)
|
||||
else:
|
||||
street_names.append(None)
|
||||
address_mapping['addr:street'] = street_names
|
||||
|
||||
# POST_COMM -> addr:city via title("POST_COMM")
|
||||
if 'POST_COMM' in processed_gdf.columns:
|
||||
city_names = []
|
||||
for post_comm in processed_gdf['POST_COMM']:
|
||||
if pd.notna(post_comm):
|
||||
city_titled = title(str(post_comm))
|
||||
city_names.append(city_titled)
|
||||
else:
|
||||
city_names.append(None)
|
||||
address_mapping['addr:city'] = city_names
|
||||
|
||||
# POST_CODE -> addr:postcode (as integer)
|
||||
if 'POST_CODE' in processed_gdf.columns:
|
||||
# Handle NaN values and convert to nullable integer
|
||||
post_code_series = processed_gdf['POST_CODE'].copy()
|
||||
# Convert to numeric, coercing errors to NaN
|
||||
post_code_series = pd.to_numeric(post_code_series, errors='coerce')
|
||||
# Round to remove decimal places, then convert to nullable integer
|
||||
address_mapping['addr:postcode'] = post_code_series.round().astype('Int64')
|
||||
|
||||
# Manually add addr:state = 'FL'
|
||||
address_mapping['addr:state'] = 'FL'
|
||||
|
||||
# Add the new address columns to the GeoDataFrame
|
||||
for key, value in address_mapping.items():
|
||||
processed_gdf[key] = value
|
||||
|
||||
return processed_gdf
|
||||
|
||||
def clean_output_data(gdf, keep_original_fields=False):
|
||||
"""
|
||||
Clean the output data, optionally keeping original fields
|
||||
|
||||
Args:
|
||||
gdf: GeoDataFrame to clean
|
||||
keep_original_fields: Whether to keep original shapefile fields
|
||||
|
||||
Returns:
|
||||
Cleaned GeoDataFrame
|
||||
"""
|
||||
# Define the OSM address fields we want to keep
|
||||
osm_fields = [
|
||||
'addr:housenumber', 'addr:unit', 'addr:street',
|
||||
'addr:city', 'addr:postcode', 'addr:state'
|
||||
]
|
||||
|
||||
if keep_original_fields:
|
||||
# Keep both original and OSM fields
|
||||
original_fields = ['ADD_NUM', 'UNIT', 'SADD', 'POST_COMM', 'POST_CODE']
|
||||
fields_to_keep = list(set(osm_fields + original_fields + ['geometry']))
|
||||
else:
|
||||
# Keep only OSM fields and geometry
|
||||
fields_to_keep = osm_fields + ['geometry']
|
||||
|
||||
# Filter to only existing columns
|
||||
existing_fields = [field for field in fields_to_keep if field in gdf.columns]
|
||||
|
||||
return gdf[existing_fields]
|
||||
|
||||
def convert_shapefile_to_geojson(
|
||||
input_shapefile,
|
||||
output_geojson,
|
||||
keep_original_fields=False,
|
||||
source_crs='ESRI:102659',
|
||||
target_crs='EPSG:4326'
|
||||
):
|
||||
"""
|
||||
Main conversion function
|
||||
|
||||
Args:
|
||||
input_shapefile: Path to input shapefile
|
||||
output_geojson: Path to output GeoJSON file
|
||||
keep_original_fields: Whether to keep original shapefile fields
|
||||
source_crs: Source coordinate reference system
|
||||
target_crs: Target coordinate reference system
|
||||
"""
|
||||
try:
|
||||
# Read shapefile
|
||||
print(f"Reading shapefile: {input_shapefile}")
|
||||
gdf = gpd.read_file(input_shapefile)
|
||||
print(f"Loaded {len(gdf)} features")
|
||||
|
||||
# Display original columns
|
||||
print(f"Original columns: {list(gdf.columns)}")
|
||||
|
||||
# Convert CRS if needed
|
||||
gdf = convert_crs(gdf, source_crs, target_crs)
|
||||
|
||||
# Process address fields
|
||||
print("Processing address fields...")
|
||||
gdf = process_address_fields(gdf)
|
||||
|
||||
# Clean output data
|
||||
gdf = clean_output_data(gdf, keep_original_fields)
|
||||
|
||||
# Remove rows with no valid geometry
|
||||
gdf = gdf[gdf.geometry.notna()]
|
||||
|
||||
print(f"Final columns: {list(gdf.columns)}")
|
||||
print(f"Final feature count: {len(gdf)}")
|
||||
|
||||
# Write to GeoJSON
|
||||
print(f"Writing GeoJSON: {output_geojson}")
|
||||
gdf.to_file(output_geojson, driver='GeoJSON')
|
||||
|
||||
print(f"Conversion completed successfully!")
|
||||
|
||||
# Display sample of processed data
|
||||
if len(gdf) > 0:
|
||||
print("\nSample of processed data:")
|
||||
sample_cols = [col for col in gdf.columns if col.startswith('addr:')]
|
||||
if sample_cols:
|
||||
print(gdf[sample_cols].head())
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during conversion: {str(e)}")
|
||||
sys.exit(1)
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to handle command line arguments
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert shapefile to GeoJSON with OSM address tags'
|
||||
)
|
||||
parser.add_argument(
|
||||
'input_shapefile',
|
||||
help='Path to input shapefile'
|
||||
)
|
||||
parser.add_argument(
|
||||
'output_geojson',
|
||||
help='Path to output GeoJSON file'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--keep-original',
|
||||
action='store_true',
|
||||
help='Keep original shapefile fields in addition to OSM fields'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--source-crs',
|
||||
default='ESRI:102659',
|
||||
help='Source coordinate reference system (default: ESRI:102659)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--target-crs',
|
||||
default='EPSG:4326',
|
||||
help='Target coordinate reference system (default: EPSG:4326)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input file
|
||||
if not os.path.exists(args.input_shapefile):
|
||||
print(f"Error: Input shapefile '{args.input_shapefile}' not found")
|
||||
sys.exit(1)
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
output_dir = Path(args.output_geojson).parent
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Run conversion
|
||||
convert_shapefile_to_geojson(
|
||||
args.input_shapefile,
|
||||
args.output_geojson,
|
||||
args.keep_original,
|
||||
args.source_crs,
|
||||
args.target_crs
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pandas as pd
|
||||
main()
|
||||
Reference in New Issue
Block a user