diff --git a/README.md b/README.md index e304608..84f37d7 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Based on https://github.com/Nate-Wessel/hamilton-import -original_data from: +`original_data` from: - https://gis-sonomacounty.hub.arcgis.com/datasets/2202c1cd6708441f987ca5552f2d9659 - https://gis-sonomacounty.hub.arcgis.com/datasets/0f5982c3582d4de0b811e68d7f0bff8f - https://overpass-turbo.eu/ @@ -20,7 +20,19 @@ out; ## Prerequisites +The postgis package appropriate for the version of postgres server you have installed (in my case, 11) + Ubuntu -- sudo apt install postgis +- sudo apt install shp2pgsql osm2pgsql +- sudo apt install postgresql-11 postgresql-11-postgis-3 +- The postgresql server started/running/configured and databases `gis` and `osm` created ## Running + +- Run the following SQL inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;` +- Unzip the `original_data` and open a shell in that folder. +- Then, run from your shell: `shp2pgsql -s 3735:4326 -g geom -I Parcels__Public_.shp | psql -d gis -U postgres -W` +- `shp2pgsql -s 3735:4326 -g geom -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W` +- `osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W` + +Now all the data is in Postgres. For processing and conflation, read through and execute `conflation.sql` as per your comfort level. diff --git a/conflation.sql b/conflation.sql new file mode 100644 index 0000000..e0b8f10 --- /dev/null +++ b/conflation.sql @@ -0,0 +1,284 @@ +-- add fields for OSM tags and data processing +ALTER TABLE sonoma_county_building_outlines + ADD COLUMN "addr:housenumber" text, + ADD COLUMN "addr:street" text, + ADD COLUMN loc_geom geometry(multipolygon,32616), + ADD COLUMN conflated boolean DEFAULT FALSE, + ADD COLUMN main boolean; -- is it the main building on the parcel? + +-- create local geometry fields and validate geometries +UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(ST_Transform(geom,32616)); +CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); + +-- added fields for the parcels table +ALTER TABLE parcels__public_ + ADD COLUMN "addr:street" text, + ADD COLUMN loc_geom geometry(multipolygon,32616), + ADD COLUMN building_count integer, + ADD COLUMN repeating BOOLEAN DEFAULT FALSE; + +-- create local geometry fields and validate geometries +UPDATE parcels__public_ SET loc_geom = ST_MakeValid(ST_Transform(geom,32616)); +CREATE INDEX ON parcels__public_ USING GIST (loc_geom); + +-- parse and expand parcel street addresses +UPDATE parcels__public_ SET "addr:street" = initcap(addrst)||' '|| + CASE + WHEN upper(addrsf) = 'AV' THEN 'Avenue' + WHEN upper(addrsf) = 'DR' THEN 'Drive' + WHEN upper(addrsf) = 'RD' THEN 'Road' + WHEN upper(addrsf) = 'ST' THEN 'Street' + WHEN upper(addrsf) = 'LN' THEN 'Lane' + WHEN upper(addrsf) = 'CT' THEN 'Court' + WHEN upper(addrsf) = 'PL' THEN 'Place' + WHEN upper(addrsf) = 'CR' THEN 'Circle' + WHEN upper(addrsf) = 'TE' THEN 'Terrace' + WHEN upper(addrsf) = 'PK' THEN 'Park' + WHEN upper(addrsf) = 'WY' THEN 'Way' + WHEN upper(addrsf) = 'BV' THEN 'Boulevard' + WHEN upper(addrsf) = 'PW' THEN 'Parkway' + WHEN upper(addrsf) = 'TL' THEN 'Trail' + WHEN upper(addrsf) = 'HW' THEN 'Highway' + WHEN upper(addrsf) = 'WA' THEN 'Way' + WHEN upper(addrsf) = 'TR' THEN 'Terrace' + WHEN upper(addrsf) = 'SQ' THEN 'Square' + WHEN upper(addrsf) = 'AL' THEN 'Alley' + WHEN upper(addrsf) = 'BL' THEN 'Boulevard' + WHEN upper(addrsf) = 'CI' THEN 'Circle' + WHEN upper(addrsf) = 'PT' THEN 'Point' + WHEN upper(addrsf) = 'PI' THEN 'Pike' + WHEN upper(addrsf) = 'LA' THEN 'Lane' + ELSE '' -- NULL cases mostly have the suffix in the name field + END; + + +-- identify repeating parcels (indicates multiple addresses associated with buildings) +WITH geom_counts AS ( + SELECT array_agg(gid) AS ids, COUNT(*) + FROM parcels__public_ + GROUP BY geom +), geom_counts2 AS ( + SELECT * FROM geom_counts WHERE count > 1 +) +UPDATE parcels__public_ SET repeating = TRUE +FROM geom_counts2 +WHERE ids @> ARRAY[gid]; + + +-- identify parcels with multiple buildings +UPDATE parcels__public_ SET building_count = NULL WHERE building_count IS NOT NULL; +WITH bcounts AS ( + SELECT + p.gid, COUNT(*) + FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) + GROUP BY p.gid +) +UPDATE parcels__public_ SET building_count = count +FROM bcounts WHERE bcounts.gid = parcels__public_.gid; + + +-- add addresses to buildings with simple 1:1 matches to parcels +UPDATE sonoma_county_building_outlines SET "addr:housenumber" = NULL, "addr:street" = NULL; +WITH a AS ( + SELECT + b.gid, p.addrno, p."addr:street" + FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) + WHERE p.building_count = 1 AND NOT p.repeating +) +UPDATE sonoma_county_building_outlines SET + "addr:housenumber" = a.addrno, + "addr:street" = a."addr:street" +FROM a WHERE sonoma_county_building_outlines.gid = a.gid; + +--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL; + +-- attempt to identify garages and sheds so they don't get addresses +UPDATE sonoma_county_building_outlines SET main = NULL; +-- sort the buildings on each parcel by size, but only where it's likely a garage/shed situation +WITH sizes AS ( + SELECT + p.gid AS pid, + b.gid AS bid, + row_number() OVER ( PARTITION BY p.gid ORDER BY ST_Area(b.loc_geom) DESC) AS size_order + FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) + WHERE + NOT p.repeating AND -- single parcels + p.building_count IN (2,3) -- 2 or 3 buildings on parcel + ORDER BY p.gid ASC +) UPDATE sonoma_county_building_outlines SET main = CASE + WHEN size_order = 1 THEN TRUE + WHEN size_order > 1 THEN FALSE + ELSE NULL +END +FROM sizes WHERE sizes.bid = sonoma_county_building_outlines.gid; + +-- now assign addresses to main buildings on parcels with outbuildings +WITH a AS ( + SELECT + b.gid, p.addrno, p."addr:street" + FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) + WHERE + p.building_count IN (2,3) + AND NOT p.repeating + AND b.main -- is main building +) +UPDATE sonoma_county_building_outlines SET + "addr:housenumber" = a.addrno, + "addr:street" = a."addr:street" +FROM a WHERE sonoma_county_building_outlines.gid = a.gid; + +-- get a count of outbuildings so we know how many addresses are intentionally unassigned +SELECT + COUNT(*) +FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) +WHERE + p.building_count IN (2,3) + AND NOT p.repeating + AND NOT b.main; -- is NOT main building + +-- try to assign multiple addresses from multiple parcels to single buildings +WITH addresses AS ( + SELECT + b.gid, + array_to_string( ARRAY_AGG(DISTINCT addrno), ';') AS housenumber, + array_to_string( ARRAY_AGG(DISTINCT "addr:street"), ';') AS street + FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) + WHERE + p.building_count = 1 AND + p.repeating AND + b."addr:housenumber" IS NULL + GROUP BY b.gid +) +UPDATE sonoma_county_building_outlines AS b SET + "addr:housenumber" = housenumber, + "addr:street" = street +FROM addresses AS a +WHERE a.gid = b.gid; + +-- try to identify addresses for buildings across multiple parcels +WITH addresses AS ( + SELECT + b.gid, + array_to_string( ARRAY_AGG(DISTINCT addrno), ';') AS addrno, + array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street, + COUNT(*) + FROM sonoma_county_building_outlines AS b + JOIN parcels__public_ AS p ON + ST_Intersects(b.loc_geom,p.loc_geom) AND + ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) < 0.9*ST_Area(b.loc_geom) + WHERE + b."addr:housenumber" IS NULL AND + NOT p.repeating AND + p.addrno IS NOT NULL AND + b.sqft > 1000 + GROUP BY b.gid +) +UPDATE sonoma_county_building_outlines AS b SET + "addr:housenumber" = addrno, + "addr:street" = street +FROM addresses AS a +WHERE + count = 1 AND -- only simple cases! + a.gid = b.gid; + +-- identify intersecting/conflated buildings +UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE; +UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE +FROM son_polygon AS osm + WHERE ST_Intersects(b.geom,osm.way) + AND osm.building IS NOT NULL and osm.building != 'no'; + +-- dump simplified polygon geometries and OSM relavant fields into another table for exporting +-- this code is based on https://trac.osgeo.org/postgis/wiki/UsersWikiSimplifyPreserveTopology +-- it does take a very long time to run on this dataset... + +-- first do conflated buildings +with poly as ( + SELECT + gid, + "addr:housenumber", + "addr:street", + est_h_feet, + storyabove, + storybelow, + cwwuse, + (st_dump(loc_geom)).* + FROM sonoma_county_building_outlines + WHERE conflated +) +SELECT + poly.gid, + poly."addr:housenumber", + poly."addr:street", + poly.est_h_feet, + poly.storyabove, + poly.storybelow, + poly.cwwuse, + ST_Transform(baz.geom,4326) AS geom +INTO simplified_conflated_buildings +FROM ( + SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom + FROM ( + -- simplify geometries to a 0.2m tolerance to avoid repeated points + SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom + FROM ( + SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom + FROM poly + ) AS foo + ) AS bar +) AS baz, poly +WHERE + ST_Intersects(poly.geom, baz.geom) + AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; +ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid); + +-- next do non-conflated buldings separately +with poly as ( + SELECT + gid, + "addr:housenumber", + "addr:street", + est_h_feet, + storyabove, + storybelow, + cwwuse, + (st_dump(loc_geom)).* + FROM sonoma_county_building_outlines + WHERE NOT conflated -- note: NOT +) +SELECT + poly.gid, + poly."addr:housenumber", + poly."addr:street", + poly.est_h_feet, + poly.storyabove, + poly.storybelow, + poly.cwwuse, + ST_Transform(baz.geom,4326) AS geom +INTO simplified_buildings +FROM ( + SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom + FROM ( + -- simplify geometries to a 0.2m tolerance to avoid repeated points + SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom + FROM ( + SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom + FROM poly + ) AS foo + ) AS bar +) AS baz, poly +WHERE + ST_Intersects(poly.geom, baz.geom) + AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;