From fa827a00582a4f7cfd175f575e381ce336880603 Mon Sep 17 00:00:00 2001 From: Will Bradley Date: Wed, 13 Jan 2021 22:47:55 -0800 Subject: [PATCH] improve conflation, adjust readme, TODOs --- README.md | 44 ++++-- conflation.sql | 413 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 379 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 84f37d7..19ebf5d 100644 --- a/README.md +++ b/README.md @@ -7,32 +7,54 @@ Based on https://github.com/Nate-Wessel/hamilton-import - https://gis-sonomacounty.hub.arcgis.com/datasets/0f5982c3582d4de0b811e68d7f0bff8f - https://overpass-turbo.eu/ -Overpass query: +Overpass query (you may save as OSM file, shapefile, or postgres sql dump depending on your overpass client) ``` area[name="Sonoma County"]; -way[building](area); -/*added by auto repair*/ +( + way[building](area); + relation[building](area); +); (._;>;); -/*end of auto repair*/ out; ``` +If using an Overpass -> QGIS -> Postgres dump, save it as `osmquery_buildings_pgdump.sql` for later. +Otherwise osm2pgsql should create tables like `son_polygon` for later. + ## Prerequisites The postgis package appropriate for the version of postgres server you have installed (in my case, 11) Ubuntu -- sudo apt install shp2pgsql osm2pgsql -- sudo apt install postgresql-11 postgresql-11-postgis-3 -- The postgresql server started/running/configured and databases `gis` and `osm` created +- sudo apt install postgresql-11 postgresql-11-postgis-3 shp2pgsql osm2pgsql + +Debian (shp2pgsql is included in postgis) +- sudo apt install postgresql postgis osm2pgsql + +- The postgresql server started/running/configured and database `gis` created ## Running -- Run the following SQL inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;` +- Run the following SQL as a superuser (postgres) inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;` - Unzip the `original_data` and open a shell in that folder. -- Then, run from your shell: `shp2pgsql -s 3735:4326 -g geom -I Parcels__Public_.shp | psql -d gis -U postgres -W` -- `shp2pgsql -s 3735:4326 -g geom -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W` -- `osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W` +- Here we are assuming that county data is in WGS84/EPSG4236 format, which was true as of last check and is also what OSM uses. +- Run from your shell: `shp2pgsql -s 4326 -I Parcels__Public_.shp | psql -d gis -U postgres -W` +- `shp2pgsql -s 4326 -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W` +- `shp2pgsql -s 4326 -I osm-buildings-01-03.shp | psql -d gis -U postgres -W` Now all the data is in Postgres. For processing and conflation, read through and execute `conflation.sql` as per your comfort level. + + +### Internal Notes +- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf + +``` +shp2pgsql -s 4326 -I Parcels__Public_.shp | psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321 +shp2pgsql -s 4326 -I Sonoma_County_Building_Outlines.shp | psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321 +psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321 -f osmquery-pgdump.sql + +#unused +osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W` +osm2pgsql -d openstreetmap -c --prefix son --slim --extra-attributes --hstore --latlong norcal-latest-20200103.osm.pbf -U openstreetmap -W -H localhost -P 54321 +``` \ No newline at end of file diff --git a/conflation.sql b/conflation.sql index 7ce3adc..f2fa528 100644 --- a/conflation.sql +++ b/conflation.sql @@ -2,61 +2,320 @@ ALTER TABLE sonoma_county_building_outlines ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:street" text, - ADD COLUMN loc_geom geometry(multipolygon,32616), + ADD COLUMN "addr:unit" text, + ADD COLUMN loc_geom geometry(multipolygon,4326), ADD COLUMN conflated boolean DEFAULT FALSE, ADD COLUMN main boolean; -- is it the main building on the parcel? -- create local geometry fields and validate geometries -UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(ST_Transform(geom,32616)); +UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom); CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); -- added fields for the parcels table ALTER TABLE parcels__public_ ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:street" text, - ADD COLUMN loc_geom geometry(multipolygon,32616), + ADD COLUMN "addr:unit" text, + ADD COLUMN loc_geom geometry(multipolygon,4326), ADD COLUMN building_count integer, ADD COLUMN repeating BOOLEAN DEFAULT FALSE; -- create local geometry fields and validate geometries -UPDATE parcels__public_ SET loc_geom = ST_MakeValid(ST_Transform(geom,32616)); +UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom); CREATE INDEX ON parcels__public_ USING GIST (loc_geom); -- parse and expand parcel street addresses -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+)$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+)' - AND situsfmt1 NOT LIKE '%NONE' AND situsfmt1 NOT SIMILAR TO '([0-9]+) ([A-Z ]+) (AVE|DR|RD|ST|LN|CT|PL|CIR|TER|BLVD|PKWY|HWY)' AND situsfmt1 NOT SIMILAR TO '% ([^A-Z]+)'; --- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ limit 100; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+) AVE$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) AVE$', '\2 Avenue')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) AVE'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) DR$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) DR$', '\2 Drive')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) DR'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) RD$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) RD$', '\2 Road')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) RD'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) ST$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) ST$', '\2 Street')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) ST'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) LN$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) LN$', '\2 Lane')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) LN'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CT$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CT$', '\2 Court')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) CT'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PL$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PL$', '\2 Place')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) PL'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CIR$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CIR$', '\2 Circle')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) CIR'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) TER$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) TER$', '\2 Terrace')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) TER'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) BLVD$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) BLVD$', '\2 Boulevard')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) BLVD'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PKWY$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PKWY$', '\2 Parkway')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) PKWY'; -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) HWY$', '\1')), - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) HWY$', '\2 Highway')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) HWY'; --- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ limit 100; +-- TODO: find/handle oddballs like 123A Main St and 123 Main St #4 +SELECT situsfmt1, "addr:housenumber", "addr:street" +FROM public.parcels__public_ +where "addr:housenumber" is null +and situsfmt1 NOT SIMILAR TO '([0-9]+)[A-Z]* [A-Z ]*([0-9]*[A-Z\- ]+)' +and situsfmt1 NOT SIMILAR TO '%NONE' +and situsfmt1 NOT SIMILAR TO '%#%' +and situsfmt1 SIMILAR TO '([0-9]+)% %' +ORDER BY gid ASC; + +-- +-- functions for address parsing +-- + +create or replace function expand_road(n varchar) RETURNS varchar as $$ +DECLARE + r varchar; +BEGIN + SELECT INTO r + CASE upper(n) + WHEN 'ACRD' THEN 'Access Road' + WHEN 'AL' THEN 'Alley' + WHEN 'ALY' THEN 'Alley' + WHEN 'ARC' THEN 'Arcade' + WHEN 'AV' THEN 'Avenue' + WHEN 'AVE' THEN 'Avenue' + WHEN 'BLF' THEN 'Bluff' + WHEN 'BLV' THEN 'Boulevard' + WHEN 'BLVD' THEN 'Boulevard' + WHEN 'BR' THEN 'Bridge' + WHEN 'BRG' THEN 'Bridge' + WHEN 'BYP' THEN 'Bypass' + WHEN 'CDS' THEN 'Cul-de-sac' + WHEN 'CIR' THEN 'Circle' + WHEN 'CMNS' THEN 'Commons' + WHEN 'CONC' THEN 'Concession' + WHEN 'CRES' THEN 'Crescent' + WHEN 'CRST' THEN 'Crest' + WHEN 'CSWY' THEN 'Crossway' + WHEN 'CT' THEN 'Court' + WHEN 'CTR' THEN 'Center' + WHEN 'CV' THEN 'Cove' + WHEN 'DR' THEN 'Drive' + WHEN 'ET' THEN 'ET' + WHEN 'EXPWY' THEN 'Expressway' + WHEN 'EXPY' THEN 'Expressway' + WHEN 'EXT' THEN 'Extension' + WHEN 'FMRD' THEN 'Farm to Market Road' + WHEN 'FWY' THEN 'Freeway' + WHEN 'GRD' THEN 'Grade' + WHEN 'HBR' THEN 'Harbor' + WHEN 'HOLW' THEN 'Hollow' + WHEN 'HWY' THEN 'Highway' + WHEN 'HTS' THEN 'Hights' + WHEN 'KY' THEN 'Key' + WHEN 'LNDG' THEN 'Landing' + WHEN 'LN' THEN 'Lane' + WHEN 'LOOP' THEN 'Loop' + WHEN 'MALL' THEN 'Mall' + WHEN 'MAL' THEN 'Mall' + WHEN 'MTWY' THEN 'Motorway' + WHEN 'OVAL' THEN 'Oval' + WHEN 'OPAS' THEN 'Overpass' + WHEN 'OVPS' THEN 'Overpass' + WHEN 'PARK' THEN 'Park' + WHEN 'PASS' THEN 'Pass' + WHEN 'PATH' THEN 'Path' + WHEN 'PIKE' THEN 'Pike' + WHEN 'PKWY' THEN 'Parkway' + WHEN 'PKY' THEN 'Parkway' + WHEN 'PL' THEN 'Place' + WHEN 'PLZ' THEN 'Plaza' + WHEN 'PSGE' THEN 'Passage' + WHEN 'PT' THEN 'Point' + WHEN 'RAMP' THEN 'Ramp' + WHEN 'RDG' THEN 'Ridge' + WHEN 'RD' THEN 'Road' + WHEN 'RMRD' THEN 'Ranch to Market Road' + WHEN 'RNCH' THEN 'Ranch' + WHEN 'ROW' THEN 'Row' + WHEN 'RTE' THEN 'Route' + WHEN 'RUE' THEN 'Rue' + WHEN 'RUN' THEN 'Run' + WHEN 'SKWY' THEN 'Skyway' + WHEN 'SPUR' THEN 'Spur' + WHEN 'SQ' THEN 'Square' + WHEN 'SR' THEN 'State Route' + WHEN 'STCT' THEN 'Street Court' + WHEN 'ST' THEN 'Street' + WHEN 'STR' THEN 'Stravenue' + WHEN 'TER' THEN 'Terrace' + WHEN 'TFWY' THEN 'Trafficway' + WHEN 'THFR' THEN 'Thoroughfare' + WHEN 'THWY' THEN 'Thruway' + WHEN 'TPKE' THEN 'Turnpike' + WHEN 'TRCE' THEN 'Trace' + WHEN 'TRL' THEN 'Trail' + WHEN 'TRL' THEN 'Trail' + WHEN 'TUNL' THEN 'Tunnel' + WHEN 'UNP' THEN 'Underpass' + WHEN 'VIA' THEN 'Viaduct' + WHEN 'VIS' THEN 'Vista' + WHEN 'WALK' THEN 'Walk' + WHEN 'WAY' THEN 'Way' + WHEN 'WKWY' THEN 'Walkway' + WHEN 'XING' THEN 'Crossing' + ELSE n + + END; + RETURN r; +END; +$$ LANGUAGE plpgsql; + +create or replace function expand_direction (n varchar) RETURNS varchar AS $$ +DECLARE + dir varchar; +BEGIN + IF n IS NULL THEN + RETURN ''; + END IF; + SELECT INTO dir + CASE n + WHEN 'N' THEN 'North' + WHEN 'NE' THEN 'Northeast' + WHEN 'NW' THEN 'Northwest' + WHEN 'E' THEN 'East' + WHEN 'W' THEN 'West' + WHEN 'S' THEN 'South' + WHEN 'SE' THEN 'Southeast' + WHEN 'SW'THEN 'Southwest' + ELSE n + END; + RETURN dir; +END; +$$ LANGUAGE plpgsql; + + +-- +-- start parsing addresses +-- + +-- reset our fields +update parcels__public_ SET "addr:housenumber" = NULL, + "addr:street" = NULL, + "addr:unit" = NULL + where "addr:housenumber" IS NOT NULL; + +-- basic 123 Main with no common suffixes or numbers +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')), + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)' + AND situsfmt1 NOT LIKE '%NONE'; + + +-- convenient query to check status as you go +select count(*), min(situsfmt1), min("addr:housenumber"), max("addr:housenumber"), "addr:street", min("addr:unit") from parcels__public_ + where "addr:housenumber" is not null + group by "addr:street" + order by count desc; + + update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', '\1')), "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)'; --- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)'; +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', '\1')), + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HIGHWAY ([0-9]+)'; +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ARROWHEAD MTN TRL$', '\1')), + "addr:street" = 'Arrowhead Mountain Trail' where situsfmt1 SIMILAR TO '([0-9]+) ARROWHEAD MTN TRL'; +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S FITCH MTN RD$', '\1')), + "addr:street" = 'South Fitch Mountain Road' where situsfmt1 SIMILAR TO '([0-9]+) S FITCH MTN RD'; +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S MCDOWELL EXT BLVD$', '\1')), + "addr:street" = 'South McDowell Boulevard Extension' where situsfmt1 SIMILAR TO '([0-9]+) S MCDOWELL EXT BLVD'; + +-- basic 123 Main St +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})'; +-- now 123 Twin Oaks Ln or 123 St Oaks Pl +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Pl / Main + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Dr / Oak + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})'; +-- now 123 E Cherry Creek Rd +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Cherry + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\4 ')) -- Creek + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Street + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})'; + +-- basic directional 123 S Main St +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2'))) -- South + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', ' \3 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\4'))) -- Street + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})'; + +-- and the ever lovable 123 Main St S +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street + || ' ' -- space + || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South) + where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})'; + +-- and the even more lovable 123 Main Hill St S +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123 + "addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\2'))) -- Main + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', ' \3 ')) -- Hill + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- Street + || ' ' -- space + || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South) + where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})'; +-- for these apartment numbers we're gonna need to start using some functions +-- no direction but three words in street name +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2 \3 ')) -- La Main + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)'; +-- suffix direction +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\3'))) -- Street + || ' ' -- space + || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- S (South) + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)'; +-- prefix direction +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2'))) -- S (South) + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)'; +-- no direction but two words in street name +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)'; +-- no direction but one word in street name +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2')), -- Main / 4th + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)'; + +-- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')), + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\2 Street Extension')) where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) EXT ST'; + +-- 123 B MEADOWBROOK CT is a unique case that actually needs to be 123 Meadowbrook Court, Unit B +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]) MEADOWBROOK CT$', '\1')), + "addr:street" = 'Meadowbrook Court', + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]) MEADOWBROOK CT$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]) MEADOWBROOK CT'; + +-- 123 HWY 116 #C1 is a unique case that needs to be 123 Highway 116, Unit C1 +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\1')), + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')), + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3')) + where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)'; + +-- TODO: replace Mc([a-z]) with Mc(upper(\1)) when preceded by nothingness or a space +-- capitalize Macarthur, Macfarlane, Mackinnon but not Macaw Mackey Mackl[iy]n or Macmahan +-- investigate Blank Road +-- consider "0" housenumbers +-- remove leading zeroes in housenumbers +-- remove "Ste" from unit +-- 5330 OLD REDWOOD HWY #A B & C +-- 27801 STEWARTS PT SKAGGS SPRGS RD +-- 10000 FRANZ VALLEY SCHOOL RD +-- 1003 HWY 116 N +-- 1382 HWY 116 S #1 +-- 100 SPRING MTN SUMMIT TRL +-- 1055 BROADWAY #C, D +-- 1055 BROADWAY #E - H +-- 10826 SUMMER HOME PARK RD +-- 1323 W DRY CREEK RD #2 +-- 1340 19TH HOLE DR +-- 14521 CANYON 2 RD +-- 14578 CANYON 1 RD +-- 1460 TOWN & COUNTRY DR +-- 99 e SHILOH RD + +-- +-- Match buildings to parcels +-- + + -- identify repeating parcels (indicates multiple addresses associated with buildings) WITH geom_counts AS ( SELECT array_agg(gid) AS ids, COUNT(*) @@ -151,6 +410,11 @@ WHERE -- result: 44090 +--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL; +-- result: 123793 +--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NULL AND "addr:street" IS NULL; +-- result: 155217 + -- try to assign multiple addresses from multiple parcels to single buildings WITH addresses AS ( SELECT @@ -172,9 +436,11 @@ UPDATE sonoma_county_building_outlines AS b SET FROM addresses AS a WHERE a.gid = b.gid; --------------------------------------------------------TODO +--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%'; +-- result: 0, may not be working TODO -- try to identify addresses for buildings across multiple parcels +-- todo: this may not have done anything WITH addresses AS ( SELECT b.gid, @@ -200,9 +466,25 @@ WHERE count = 1 AND -- only simple cases! a.gid = b.gid; +--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%'; +-- result: 0, may not be working TODO + + -- identify intersecting/conflated buildings + +-- +-- RUN ONLY ONE +-- + +-- IF USING Overpass -> QGIS -> Postgres Dump: UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE; -UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE +UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE +FROM osmquery_buildings_pgdump AS osm + WHERE ST_Intersects(b.geom,osm.wkb_geometry) + AND osm.building IS NOT NULL and osm.building != 'no'; + +-- IF USING a direct OSM2PGSQL import: +UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE FROM son_polygon AS osm WHERE ST_Intersects(b.geom,osm.way) AND osm.building IS NOT NULL and osm.building != 'no'; @@ -213,26 +495,20 @@ FROM son_polygon AS osm -- first do conflated buildings with poly as ( - SELECT - gid, - "addr:housenumber", - "addr:street", - est_h_feet, - storyabove, - storybelow, - cwwuse, - (st_dump(loc_geom)).* + SELECT + gid, + "addr:housenumber", + "addr:street", + "addr:unit", + (st_dump(loc_geom)).* FROM sonoma_county_building_outlines WHERE conflated ) SELECT poly.gid, poly."addr:housenumber", - poly."addr:street", - poly.est_h_feet, - poly.storyabove, - poly.storybelow, - poly.cwwuse, + poly."addr:street", + poly."addr:unit", ST_Transform(baz.geom,4326) AS geom INTO simplified_conflated_buildings FROM ( @@ -251,29 +527,32 @@ WHERE AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid); + +-- 233966 duplicated, deleted smaller +-- 248900 duplicated, deleted smaller +-- 246427 duplicated, deleted smaller +-- 240471 duplicated, deleted smaller +-- 277549 duplicated, deleted smaller +-- 269953 + + -- next do non-conflated buldings separately with poly as ( SELECT - gid, - "addr:housenumber", - "addr:street", - est_h_feet, - storyabove, - storybelow, - cwwuse, - (st_dump(loc_geom)).* + gid, + "addr:housenumber", + "addr:street", + "addr:unit", + (st_dump(loc_geom)).* FROM sonoma_county_building_outlines - WHERE NOT conflated -- note: NOT + WHERE NOT conflated --note: NOT ) SELECT - poly.gid, - poly."addr:housenumber", - poly."addr:street", - poly.est_h_feet, - poly.storyabove, - poly.storybelow, - poly.cwwuse, - ST_Transform(baz.geom,4326) AS geom + poly.gid, + poly."addr:housenumber", + poly."addr:street", + poly."addr:unit", + ST_Transform(baz.geom,4326) AS geom INTO simplified_buildings FROM ( SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom