Prepping for slicing, fixing dangling addresses, adding city/state/use/gid

This commit is contained in:
2021-02-04 10:43:33 -08:00
parent a9a8646c46
commit bef81198a8
8 changed files with 526 additions and 73 deletions

139
reference/basemap.py Normal file
View File

@@ -0,0 +1,139 @@
def filterLayer(layer):
if layer is None:
print("filterLayer: empty")
return None
print(layer.GetName())
#if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]:
if layer.GetName() in ["buildingfootprint_filtered", "Site_Address_Points_filtered", "mergedbuildings_filtered", "namedparcels_filtered"]:
return layer
def mergeToRanges(ls):
""" Takes a list like ['1', '2', '3', '5', '8', 9'] and returns a list like
['1-3', '5', '8', '9'] """
if len(ls) < 2:
return ls
i = 0
while i < len(ls)-1 and \
((ls[i].isdigit() and ls[i+1].isdigit() and \
int(ls[i])+1 == int(ls[i+1])) or \
(len(ls[i]) == 1 and len(ls[i+1]) == 1 and \
ord(ls[i])+1 == ord(ls[i+1]))):
i += 1
if i < 2:
return ls[0:i+1]+mergeToRanges(ls[i+1:])
else:
return [ls[0]+'-'+ls[i]]+mergeToRanges(ls[i+1:])
# I don't actually know if the building heights are in US standard feet or
# survey feet. But the difference is less than the significant digits for the
# tallest building.
SURVEY_FEET_TO_METER = 1200.0/3937.0
def filterTags(attrs):
if attrs is None:
print("filterTags: empty")
return None
tags = {}
if "bldgelev" in attrs:
# BuildingFootprint
tags["building"] = "yes"
# Always appear, has equivalent
tags["height"] = "%.02f"%round(float(attrs["bldgheight"])*SURVEY_FEET_TO_METER, 2)
tags["ele"] = "%.02f"%round(float(attrs["bldgelev"])*SURVEY_FEET_TO_METER, 2)
# Always appear, no equivalent: FACILITYID
# Sometimes appear, no equivalent: LASTUPDATE
# Empty: LENGTH, SHAPE_AREA
if "Inc_Muni" in attrs:
# Site_Address_Points
# Always appear, has equivalent
tags["addr:city"] = attrs["Inc_Muni"]
# Sometimes appear, has equivalent
addr = attrs["Add_Number"]
if addr:
addr = addr.split(';')
m = max(map(len, addr))
addr.sort(key=lambda a: a.rjust(m))
addr = ';'.join(mergeToRanges(addr))
if attrs["AddNum_Suf"]:
addr += " " + attrs["AddNum_Suf"]
tags["addr:housenumber"] = addr
street = attrs["CompName"]
if street:
if street.startswith("St "): street = "Saint"+street[2:]
elif street.startswith("Mt "): street = "Mount"+street[2:]
elif street.startswith("East St "): street = "East Saint"+street[7:]
elif street.startswith("West St "): street = "West Saint"+street[7:]
tags["addr:street"] = street
units = attrs["Unit"]
if units:
units = units.split(';')
m = max(map(len, units))
units.sort(key=lambda a: a.rjust(m))
units = ';'.join(mergeToRanges(units))
tags["addr:unit"] = units
zipcode = attrs["Post_Code"]
if zipcode: tags["addr:postcode"] = zipcode
pt = attrs["Place_Type"]
#if pt == "BU":
#tags["office"] = "yes"
if pt == "ED":
tags["amenity"] = "school"
elif pt == "FB":
tags["amenity"] = "place_of_worship"
elif pt == "GO":
tags["office"] = "government"
elif pt == "GQ":
# Salvation army
tags["amenity"] = "social_facility"
elif pt == "HS":
tags["amenity"] = "hospital"
elif pt == "HT" and not units:
tags["tourism"] = "hotel"
elif pt == "RE":
tags["club"] = "sport"
elif pt == "RT":
tags["amenity"] = "restaurant"
elif pt == "RL":
tags["shop"] = "yes"
elif pt == "TR":
tags["public_transport"] = "platform"
# Always appear, no equivalent: OBJECTID, Site_NGUID, ESN, Lat, Long, Status, Juris_Auth, LastUpdate, LastEditor, GlobalID
# FullMailin could be used for addr:full, but it's unneeded.
# Sometimes appear, no equivalent: RCL_NGUID, StreetMast, ParcelID, CondoParce, UnitID, RSN, PSAP_ID, St_PreDirA, St_PreTyp, StreetName, St_PosTyp, St_PosTypC, St_PosTypU, St_PosDir, Feanme, FullName, Unit_Type, Building, FullUnit, FullAddres, Addtl_Loc, LSt_PreDir, LSt_Name, LSt_Type, Uninc_Comm, Post_Comm, Source, Effective, Notes
# Always the same: Client_ID, County, State, Country, Placement
# Always empty: Site_NGU00, AddNum_Pre, St_PreMod, St_PreDir, St_PreSep, St_PosMod, Floor, Room, Seat, Post_Code4, APN, LStPostDir, AddCode, AddDataURI, Nbrhd_Comm, MSAGComm, LandmkName, Mile_Post, Elev, Expire
if "Inc_Muni" in attrs and "bldgelev" in attrs:
# Merged address/buildings
# other Place_Type are Common Area (multi-use), Miscellaneous
tags["building"] = {"BU": "commercial",
"ED": "school",
"FB": "religious",
"GO": "government",
"HS": "hospital",
"HT": "hotel",
"MH": "static_caravan",
"Condominium": "residential",
"MF": "residential",
"RL": "retail",
"RT": "retail",
"SF": "house"}.get(attrs["Place_Type"], "yes")
if "Addtl_Loc" in attrs and "Inc_Muni" not in attrs:
# Named parcels
tags["landuse"] = "residential"
tags["name"] = attrs["Addtl_Loc"].title()
return tags

110
reference/export.sh Normal file
View File

@@ -0,0 +1,110 @@
#!/bin/bash
#
# Based on https://github.com/codeforsanjose/OSM-SouthBay/blob/main/SJ_Buildings/start.sh
#
# Before running, download VTA TAZ data from Google Drive here:
# https://drive.google.com/file/d/0B098fXDVjQOhVHBFS0kwcDNGRlU/view
# and place into a folder named "data"
# (might need to rename VTATaz.dbf)
DBNAME=svosm
OGR2OSM=../../ogr2osm/ogr2osm.py
# DB setup
psql --echo-all --command="create extension if not exists hstore;" "${DBNAME}" postgres
psql --echo-all --command="create extension if not exists postgis;" "${DBNAME}" postgres
# Add ESRI:103240 to PostGIS
# from https://github.com/Esri/projection-engine-db-doc/
psql --echo-all --file="103240.sql" "${DBNAME}" postgres
echo "Importing TAZ"
shp2pgsql -d -D -s 103240 -I "data/VTATaz" | psql -d "${DBNAME}" >/dev/null
echo "Downloading Basemap"
curl "https://www.sanjoseca.gov/DocumentCenter/View/17141" --output "Basemap.zip"
unzip "Basemap.zip" "Parcel.*" -d "data"
echo "Importing Parcel"
shp2pgsql -d -D -s 103240 -t 2D -I "data/Parcel" | psql -d "${DBNAME}" >/dev/null
echo "Downloading Basemap_2"
curl "http://www.sanjoseca.gov/DocumentCenter/View/44895" --output "Basemap_2.zip"
unzip "Basemap_2.zip" "BuildingFootprint.*" "CondoParcel.*" "Site_Address_Points.*" \
-d "data"
echo "Importing BuildingFootprint"
shp2pgsql -d -D -s 103240 -I "data/BuildingFootprint" \
| psql -d "${DBNAME}" >/dev/null
echo "Importing CondoParcel"
shp2pgsql -d -D -s 103240 -I "data/CondoParcel" | psql -d "${DBNAME}" >/dev/null
echo "Importing Site_Address_Points"
shp2pgsql -d -D -s 2227 -k -I "data/Site_Address_Points" \
| psql -d "${DBNAME}" >/dev/null
# Download and import existing OSM data
echo "Downloading norcal-latest.osm.pbf"
curl "https://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf" \
--output "data/norcal-latest.osm.pbf"
echo "Importing norcal-latest.osm.pbf"
osm2pgsql --database "${DBNAME}" --create \
--prefix osm \
--slim --hstore \
--latlong --multi-geometry \
--bbox "-122.038182903664,37.1409050504209,-121.593273327604,37.4640955052253" \
"data/norcal-latest.osm.pbf"
# Merge addresses to buildings
psql -v "ON_ERROR_STOP=true" --echo-queries --file="merge.sql" "${DBNAME}"
# Split into tasks
mkdir "out"
mkdir "out/intersecting"
mkdir "out/clean"
for intersects in false true; do
if ${intersects}; then
outdir="intersecting"
intersectsQuery="intersectsExisting"
else
outdir="clean"
intersectsQuery="not intersectsExisting"
fi
# The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking
# https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm
ogr2ogr -sql "select 'https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \
-t_srs EPSG:4326 \
"out/grouped_${outdir}_buildings_zones.geojson" \
"PG:dbname=${DBNAME} host=localhost"
sed -i 's/ //g' "out/grouped_${outdir}_buildings_zones.geojson"
for cid in {1153..2632}; do
# Skip empty TAZs
if [ $(psql --command="copy (select count(*) from VTATaz where key=${cid}) to stdout csv" ${DBNAME}) = 0 ]; then
continue
fi
output="out/${outdir}/buildings_${cid}.osm"
# Filter export data to each CID
for layer in "buildingfootprint" "Site_Address_Points" "mergedbuildings" "namedparcels"; do
psql -v "ON_ERROR_STOP=true" --echo-queries --command="create or replace view \"${layer}_filtered\" as select * from \"${layer}\" where cid=${cid} and ${intersectsQuery};" "${DBNAME}"
done
# Export to OSM
python "${OGR2OSM}" "PG:dbname=${DBNAME} host=localhost" -t "basemap.py" -f --no-memory-copy -o "${output}"
# Add sample region outline
#sed -i '3i<bounds minlat="37.2440898883458" minlon="-121.875007225253" maxlat="37.25775329679" maxlon="-121.855829662555" />' "${output}"
done
done