improve conflation, adjust readme, TODOs

This commit is contained in:
Will Bradley 2021-01-13 22:47:55 -08:00
parent 868c23eb79
commit fa827a0058
2 changed files with 379 additions and 78 deletions

View File

@ -7,32 +7,54 @@ Based on https://github.com/Nate-Wessel/hamilton-import
- https://gis-sonomacounty.hub.arcgis.com/datasets/0f5982c3582d4de0b811e68d7f0bff8f - https://gis-sonomacounty.hub.arcgis.com/datasets/0f5982c3582d4de0b811e68d7f0bff8f
- https://overpass-turbo.eu/ - https://overpass-turbo.eu/
Overpass query: Overpass query (you may save as OSM file, shapefile, or postgres sql dump depending on your overpass client)
``` ```
area[name="Sonoma County"]; area[name="Sonoma County"];
way[building](area); (
/*added by auto repair*/ way[building](area);
relation[building](area);
);
(._;>;); (._;>;);
/*end of auto repair*/
out; out;
``` ```
If using an Overpass -> QGIS -> Postgres dump, save it as `osmquery_buildings_pgdump.sql` for later.
Otherwise osm2pgsql should create tables like `son_polygon` for later.
## Prerequisites ## Prerequisites
The postgis package appropriate for the version of postgres server you have installed (in my case, 11) The postgis package appropriate for the version of postgres server you have installed (in my case, 11)
Ubuntu Ubuntu
- sudo apt install shp2pgsql osm2pgsql - sudo apt install postgresql-11 postgresql-11-postgis-3 shp2pgsql osm2pgsql
- sudo apt install postgresql-11 postgresql-11-postgis-3
- The postgresql server started/running/configured and databases `gis` and `osm` created Debian (shp2pgsql is included in postgis)
- sudo apt install postgresql postgis osm2pgsql
- The postgresql server started/running/configured and database `gis` created
## Running ## Running
- Run the following SQL inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;` - Run the following SQL as a superuser (postgres) inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;`
- Unzip the `original_data` and open a shell in that folder. - Unzip the `original_data` and open a shell in that folder.
- Then, run from your shell: `shp2pgsql -s 3735:4326 -g geom -I Parcels__Public_.shp | psql -d gis -U postgres -W` - Here we are assuming that county data is in WGS84/EPSG4236 format, which was true as of last check and is also what OSM uses.
- `shp2pgsql -s 3735:4326 -g geom -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W` - Run from your shell: `shp2pgsql -s 4326 -I Parcels__Public_.shp | psql -d gis -U postgres -W`
- `osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W` - `shp2pgsql -s 4326 -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W`
- `shp2pgsql -s 4326 -I osm-buildings-01-03.shp | psql -d gis -U postgres -W`
Now all the data is in Postgres. For processing and conflation, read through and execute `conflation.sql` as per your comfort level. Now all the data is in Postgres. For processing and conflation, read through and execute `conflation.sql` as per your comfort level.
### Internal Notes
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
```
shp2pgsql -s 4326 -I Parcels__Public_.shp | psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321
shp2pgsql -s 4326 -I Sonoma_County_Building_Outlines.shp | psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321
psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321 -f osmquery-pgdump.sql
#unused
osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W`
osm2pgsql -d openstreetmap -c --prefix son --slim --extra-attributes --hstore --latlong norcal-latest-20200103.osm.pbf -U openstreetmap -W -H localhost -P 54321
```

View File

@ -2,61 +2,320 @@
ALTER TABLE sonoma_county_building_outlines ALTER TABLE sonoma_county_building_outlines
ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text, ADD COLUMN "addr:street" text,
ADD COLUMN loc_geom geometry(multipolygon,32616), ADD COLUMN "addr:unit" text,
ADD COLUMN loc_geom geometry(multipolygon,4326),
ADD COLUMN conflated boolean DEFAULT FALSE, ADD COLUMN conflated boolean DEFAULT FALSE,
ADD COLUMN main boolean; -- is it the main building on the parcel? ADD COLUMN main boolean; -- is it the main building on the parcel?
-- create local geometry fields and validate geometries -- create local geometry fields and validate geometries
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(ST_Transform(geom,32616)); UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom);
-- added fields for the parcels table -- added fields for the parcels table
ALTER TABLE parcels__public_ ALTER TABLE parcels__public_
ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text, ADD COLUMN "addr:street" text,
ADD COLUMN loc_geom geometry(multipolygon,32616), ADD COLUMN "addr:unit" text,
ADD COLUMN loc_geom geometry(multipolygon,4326),
ADD COLUMN building_count integer, ADD COLUMN building_count integer,
ADD COLUMN repeating BOOLEAN DEFAULT FALSE; ADD COLUMN repeating BOOLEAN DEFAULT FALSE;
-- create local geometry fields and validate geometries -- create local geometry fields and validate geometries
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(ST_Transform(geom,32616)); UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
CREATE INDEX ON parcels__public_ USING GIST (loc_geom); CREATE INDEX ON parcels__public_ USING GIST (loc_geom);
-- parse and expand parcel street addresses -- parse and expand parcel street addresses
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+)$', '\1')), -- TODO: find/handle oddballs like 123A Main St and 123 Main St #4
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+)' SELECT situsfmt1, "addr:housenumber", "addr:street"
AND situsfmt1 NOT LIKE '%NONE' AND situsfmt1 NOT SIMILAR TO '([0-9]+) ([A-Z ]+) (AVE|DR|RD|ST|LN|CT|PL|CIR|TER|BLVD|PKWY|HWY)' AND situsfmt1 NOT SIMILAR TO '% ([^A-Z]+)'; FROM public.parcels__public_
-- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ limit 100; where "addr:housenumber" is null
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+) AVE$', '\1')), and situsfmt1 NOT SIMILAR TO '([0-9]+)[A-Z]* [A-Z ]*([0-9]*[A-Z\- ]+)'
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) AVE$', '\2 Avenue')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) AVE'; and situsfmt1 NOT SIMILAR TO '%NONE'
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) DR$', '\1')), and situsfmt1 NOT SIMILAR TO '%#%'
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) DR$', '\2 Drive')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) DR'; and situsfmt1 SIMILAR TO '([0-9]+)% %'
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) RD$', '\1')), ORDER BY gid ASC;
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) RD$', '\2 Road')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) RD';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) ST$', '\1')), --
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) ST$', '\2 Street')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) ST'; -- functions for address parsing
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) LN$', '\1')), --
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) LN$', '\2 Lane')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) LN';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CT$', '\1')), create or replace function expand_road(n varchar) RETURNS varchar as $$
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CT$', '\2 Court')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) CT'; DECLARE
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PL$', '\1')), r varchar;
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PL$', '\2 Place')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) PL'; BEGIN
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CIR$', '\1')), SELECT INTO r
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CIR$', '\2 Circle')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) CIR'; CASE upper(n)
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) TER$', '\1')), WHEN 'ACRD' THEN 'Access Road'
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) TER$', '\2 Terrace')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) TER'; WHEN 'AL' THEN 'Alley'
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) BLVD$', '\1')), WHEN 'ALY' THEN 'Alley'
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) BLVD$', '\2 Boulevard')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) BLVD'; WHEN 'ARC' THEN 'Arcade'
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PKWY$', '\1')), WHEN 'AV' THEN 'Avenue'
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PKWY$', '\2 Parkway')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) PKWY'; WHEN 'AVE' THEN 'Avenue'
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) HWY$', '\1')), WHEN 'BLF' THEN 'Bluff'
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) HWY$', '\2 Highway')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) HWY'; WHEN 'BLV' THEN 'Boulevard'
-- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ limit 100; WHEN 'BLVD' THEN 'Boulevard'
WHEN 'BR' THEN 'Bridge'
WHEN 'BRG' THEN 'Bridge'
WHEN 'BYP' THEN 'Bypass'
WHEN 'CDS' THEN 'Cul-de-sac'
WHEN 'CIR' THEN 'Circle'
WHEN 'CMNS' THEN 'Commons'
WHEN 'CONC' THEN 'Concession'
WHEN 'CRES' THEN 'Crescent'
WHEN 'CRST' THEN 'Crest'
WHEN 'CSWY' THEN 'Crossway'
WHEN 'CT' THEN 'Court'
WHEN 'CTR' THEN 'Center'
WHEN 'CV' THEN 'Cove'
WHEN 'DR' THEN 'Drive'
WHEN 'ET' THEN 'ET'
WHEN 'EXPWY' THEN 'Expressway'
WHEN 'EXPY' THEN 'Expressway'
WHEN 'EXT' THEN 'Extension'
WHEN 'FMRD' THEN 'Farm to Market Road'
WHEN 'FWY' THEN 'Freeway'
WHEN 'GRD' THEN 'Grade'
WHEN 'HBR' THEN 'Harbor'
WHEN 'HOLW' THEN 'Hollow'
WHEN 'HWY' THEN 'Highway'
WHEN 'HTS' THEN 'Hights'
WHEN 'KY' THEN 'Key'
WHEN 'LNDG' THEN 'Landing'
WHEN 'LN' THEN 'Lane'
WHEN 'LOOP' THEN 'Loop'
WHEN 'MALL' THEN 'Mall'
WHEN 'MAL' THEN 'Mall'
WHEN 'MTWY' THEN 'Motorway'
WHEN 'OVAL' THEN 'Oval'
WHEN 'OPAS' THEN 'Overpass'
WHEN 'OVPS' THEN 'Overpass'
WHEN 'PARK' THEN 'Park'
WHEN 'PASS' THEN 'Pass'
WHEN 'PATH' THEN 'Path'
WHEN 'PIKE' THEN 'Pike'
WHEN 'PKWY' THEN 'Parkway'
WHEN 'PKY' THEN 'Parkway'
WHEN 'PL' THEN 'Place'
WHEN 'PLZ' THEN 'Plaza'
WHEN 'PSGE' THEN 'Passage'
WHEN 'PT' THEN 'Point'
WHEN 'RAMP' THEN 'Ramp'
WHEN 'RDG' THEN 'Ridge'
WHEN 'RD' THEN 'Road'
WHEN 'RMRD' THEN 'Ranch to Market Road'
WHEN 'RNCH' THEN 'Ranch'
WHEN 'ROW' THEN 'Row'
WHEN 'RTE' THEN 'Route'
WHEN 'RUE' THEN 'Rue'
WHEN 'RUN' THEN 'Run'
WHEN 'SKWY' THEN 'Skyway'
WHEN 'SPUR' THEN 'Spur'
WHEN 'SQ' THEN 'Square'
WHEN 'SR' THEN 'State Route'
WHEN 'STCT' THEN 'Street Court'
WHEN 'ST' THEN 'Street'
WHEN 'STR' THEN 'Stravenue'
WHEN 'TER' THEN 'Terrace'
WHEN 'TFWY' THEN 'Trafficway'
WHEN 'THFR' THEN 'Thoroughfare'
WHEN 'THWY' THEN 'Thruway'
WHEN 'TPKE' THEN 'Turnpike'
WHEN 'TRCE' THEN 'Trace'
WHEN 'TRL' THEN 'Trail'
WHEN 'TRL' THEN 'Trail'
WHEN 'TUNL' THEN 'Tunnel'
WHEN 'UNP' THEN 'Underpass'
WHEN 'VIA' THEN 'Viaduct'
WHEN 'VIS' THEN 'Vista'
WHEN 'WALK' THEN 'Walk'
WHEN 'WAY' THEN 'Way'
WHEN 'WKWY' THEN 'Walkway'
WHEN 'XING' THEN 'Crossing'
ELSE n
END;
RETURN r;
END;
$$ LANGUAGE plpgsql;
create or replace function expand_direction (n varchar) RETURNS varchar AS $$
DECLARE
dir varchar;
BEGIN
IF n IS NULL THEN
RETURN '';
END IF;
SELECT INTO dir
CASE n
WHEN 'N' THEN 'North'
WHEN 'NE' THEN 'Northeast'
WHEN 'NW' THEN 'Northwest'
WHEN 'E' THEN 'East'
WHEN 'W' THEN 'West'
WHEN 'S' THEN 'South'
WHEN 'SE' THEN 'Southeast'
WHEN 'SW'THEN 'Southwest'
ELSE n
END;
RETURN dir;
END;
$$ LANGUAGE plpgsql;
--
-- start parsing addresses
--
-- reset our fields
update parcels__public_ SET "addr:housenumber" = NULL,
"addr:street" = NULL,
"addr:unit" = NULL
where "addr:housenumber" IS NOT NULL;
-- basic 123 Main with no common suffixes or numbers
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)'
AND situsfmt1 NOT LIKE '%NONE';
-- convenient query to check status as you go
select count(*), min(situsfmt1), min("addr:housenumber"), max("addr:housenumber"), "addr:street", min("addr:unit") from parcels__public_
where "addr:housenumber" is not null
group by "addr:street"
order by count desc;
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', '\1')), update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)'; "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)';
-- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)'; update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HIGHWAY ([0-9]+)';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ARROWHEAD MTN TRL$', '\1')),
"addr:street" = 'Arrowhead Mountain Trail' where situsfmt1 SIMILAR TO '([0-9]+) ARROWHEAD MTN TRL';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S FITCH MTN RD$', '\1')),
"addr:street" = 'South Fitch Mountain Road' where situsfmt1 SIMILAR TO '([0-9]+) S FITCH MTN RD';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S MCDOWELL EXT BLVD$', '\1')),
"addr:street" = 'South McDowell Boulevard Extension' where situsfmt1 SIMILAR TO '([0-9]+) S MCDOWELL EXT BLVD';
-- basic 123 Main St
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})';
-- now 123 Twin Oaks Ln or 123 St Oaks Pl
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Pl / Main
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Dr / Oak
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- now 123 E Cherry Creek Rd
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Cherry
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\4 ')) -- Creek
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Street
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})';
-- basic directional 123 S Main St
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2'))) -- South
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', ' \3 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\4'))) -- Street
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})';
-- and the ever lovable 123 Main St S
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street
|| ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South)
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})';
-- and the even more lovable 123 Main Hill St S
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\2'))) -- Main
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', ' \3 ')) -- Hill
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- Street
|| ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South)
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})';
-- for these apartment numbers we're gonna need to start using some functions
-- no direction but three words in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2 \3 ')) -- La Main
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)';
-- suffix direction
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\3'))) -- Street
|| ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- S (South)
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)';
-- prefix direction
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2'))) -- S (South)
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)';
-- no direction but two words in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)';
-- no direction but one word in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2')), -- Main / 4th
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)';
-- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\2 Street Extension')) where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) EXT ST';
-- 123 B MEADOWBROOK CT is a unique case that actually needs to be 123 Meadowbrook Court, Unit B
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]) MEADOWBROOK CT$', '\1')),
"addr:street" = 'Meadowbrook Court',
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]) MEADOWBROOK CT$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]) MEADOWBROOK CT';
-- 123 HWY 116 #C1 is a unique case that needs to be 123 Highway 116, Unit C1
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')),
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3'))
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)';
-- TODO: replace Mc([a-z]) with Mc(upper(\1)) when preceded by nothingness or a space
-- capitalize Macarthur, Macfarlane, Mackinnon but not Macaw Mackey Mackl[iy]n or Macmahan
-- investigate Blank Road
-- consider "0" housenumbers
-- remove leading zeroes in housenumbers
-- remove "Ste" from unit
-- 5330 OLD REDWOOD HWY #A B & C
-- 27801 STEWARTS PT SKAGGS SPRGS RD
-- 10000 FRANZ VALLEY SCHOOL RD
-- 1003 HWY 116 N
-- 1382 HWY 116 S #1
-- 100 SPRING MTN SUMMIT TRL
-- 1055 BROADWAY #C, D
-- 1055 BROADWAY #E - H
-- 10826 SUMMER HOME PARK RD
-- 1323 W DRY CREEK RD #2
-- 1340 19TH HOLE DR
-- 14521 CANYON 2 RD
-- 14578 CANYON 1 RD
-- 1460 TOWN & COUNTRY DR
-- 99 e SHILOH RD
--
-- Match buildings to parcels
--
-- identify repeating parcels (indicates multiple addresses associated with buildings) -- identify repeating parcels (indicates multiple addresses associated with buildings)
WITH geom_counts AS ( WITH geom_counts AS (
SELECT array_agg(gid) AS ids, COUNT(*) SELECT array_agg(gid) AS ids, COUNT(*)
@ -151,6 +410,11 @@ WHERE
-- result: 44090 -- result: 44090
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL;
-- result: 123793
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NULL AND "addr:street" IS NULL;
-- result: 155217
-- try to assign multiple addresses from multiple parcels to single buildings -- try to assign multiple addresses from multiple parcels to single buildings
WITH addresses AS ( WITH addresses AS (
SELECT SELECT
@ -172,9 +436,11 @@ UPDATE sonoma_county_building_outlines AS b SET
FROM addresses AS a FROM addresses AS a
WHERE a.gid = b.gid; WHERE a.gid = b.gid;
-------------------------------------------------------TODO --select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
-- result: 0, may not be working TODO
-- try to identify addresses for buildings across multiple parcels -- try to identify addresses for buildings across multiple parcels
-- todo: this may not have done anything
WITH addresses AS ( WITH addresses AS (
SELECT SELECT
b.gid, b.gid,
@ -200,9 +466,25 @@ WHERE
count = 1 AND -- only simple cases! count = 1 AND -- only simple cases!
a.gid = b.gid; a.gid = b.gid;
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
-- result: 0, may not be working TODO
-- identify intersecting/conflated buildings -- identify intersecting/conflated buildings
--
-- RUN ONLY ONE
--
-- IF USING Overpass -> QGIS -> Postgres Dump:
UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE; UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE;
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
FROM osmquery_buildings_pgdump AS osm
WHERE ST_Intersects(b.geom,osm.wkb_geometry)
AND osm.building IS NOT NULL and osm.building != 'no';
-- IF USING a direct OSM2PGSQL import:
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
FROM son_polygon AS osm FROM son_polygon AS osm
WHERE ST_Intersects(b.geom,osm.way) WHERE ST_Intersects(b.geom,osm.way)
AND osm.building IS NOT NULL and osm.building != 'no'; AND osm.building IS NOT NULL and osm.building != 'no';
@ -213,26 +495,20 @@ FROM son_polygon AS osm
-- first do conflated buildings -- first do conflated buildings
with poly as ( with poly as (
SELECT SELECT
gid, gid,
"addr:housenumber", "addr:housenumber",
"addr:street", "addr:street",
est_h_feet, "addr:unit",
storyabove, (st_dump(loc_geom)).*
storybelow,
cwwuse,
(st_dump(loc_geom)).*
FROM sonoma_county_building_outlines FROM sonoma_county_building_outlines
WHERE conflated WHERE conflated
) )
SELECT SELECT
poly.gid, poly.gid,
poly."addr:housenumber", poly."addr:housenumber",
poly."addr:street", poly."addr:street",
poly.est_h_feet, poly."addr:unit",
poly.storyabove,
poly.storybelow,
poly.cwwuse,
ST_Transform(baz.geom,4326) AS geom ST_Transform(baz.geom,4326) AS geom
INTO simplified_conflated_buildings INTO simplified_conflated_buildings
FROM ( FROM (
@ -251,29 +527,32 @@ WHERE
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid); ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid);
-- 233966 duplicated, deleted smaller
-- 248900 duplicated, deleted smaller
-- 246427 duplicated, deleted smaller
-- 240471 duplicated, deleted smaller
-- 277549 duplicated, deleted smaller
-- 269953
-- next do non-conflated buldings separately -- next do non-conflated buldings separately
with poly as ( with poly as (
SELECT SELECT
gid, gid,
"addr:housenumber", "addr:housenumber",
"addr:street", "addr:street",
est_h_feet, "addr:unit",
storyabove, (st_dump(loc_geom)).*
storybelow,
cwwuse,
(st_dump(loc_geom)).*
FROM sonoma_county_building_outlines FROM sonoma_county_building_outlines
WHERE NOT conflated -- note: NOT WHERE NOT conflated --note: NOT
) )
SELECT SELECT
poly.gid, poly.gid,
poly."addr:housenumber", poly."addr:housenumber",
poly."addr:street", poly."addr:street",
poly.est_h_feet, poly."addr:unit",
poly.storyabove, ST_Transform(baz.geom,4326) AS geom
poly.storybelow,
poly.cwwuse,
ST_Transform(baz.geom,4326) AS geom
INTO simplified_buildings INTO simplified_buildings
FROM ( FROM (
SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom