mirror of
https://github.com/zyphlar/sonoma-import.git
synced 2024-03-08 15:07:48 +00:00
improve conflation, adjust readme, TODOs
This commit is contained in:
parent
868c23eb79
commit
fa827a0058
44
README.md
44
README.md
@ -7,32 +7,54 @@ Based on https://github.com/Nate-Wessel/hamilton-import
|
||||
- https://gis-sonomacounty.hub.arcgis.com/datasets/0f5982c3582d4de0b811e68d7f0bff8f
|
||||
- https://overpass-turbo.eu/
|
||||
|
||||
Overpass query:
|
||||
Overpass query (you may save as OSM file, shapefile, or postgres sql dump depending on your overpass client)
|
||||
|
||||
```
|
||||
area[name="Sonoma County"];
|
||||
way[building](area);
|
||||
/*added by auto repair*/
|
||||
(
|
||||
way[building](area);
|
||||
relation[building](area);
|
||||
);
|
||||
(._;>;);
|
||||
/*end of auto repair*/
|
||||
out;
|
||||
```
|
||||
|
||||
If using an Overpass -> QGIS -> Postgres dump, save it as `osmquery_buildings_pgdump.sql` for later.
|
||||
Otherwise osm2pgsql should create tables like `son_polygon` for later.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
The postgis package appropriate for the version of postgres server you have installed (in my case, 11)
|
||||
|
||||
Ubuntu
|
||||
- sudo apt install shp2pgsql osm2pgsql
|
||||
- sudo apt install postgresql-11 postgresql-11-postgis-3
|
||||
- The postgresql server started/running/configured and databases `gis` and `osm` created
|
||||
- sudo apt install postgresql-11 postgresql-11-postgis-3 shp2pgsql osm2pgsql
|
||||
|
||||
Debian (shp2pgsql is included in postgis)
|
||||
- sudo apt install postgresql postgis osm2pgsql
|
||||
|
||||
- The postgresql server started/running/configured and database `gis` created
|
||||
|
||||
## Running
|
||||
|
||||
- Run the following SQL inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;`
|
||||
- Run the following SQL as a superuser (postgres) inside the `gis` database to enable the PostGIS and hstore extensions: `CREATE EXTENSION postgis; CREATE EXTENSION hstore;`
|
||||
- Unzip the `original_data` and open a shell in that folder.
|
||||
- Then, run from your shell: `shp2pgsql -s 3735:4326 -g geom -I Parcels__Public_.shp | psql -d gis -U postgres -W`
|
||||
- `shp2pgsql -s 3735:4326 -g geom -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W`
|
||||
- `osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W`
|
||||
- Here we are assuming that county data is in WGS84/EPSG4236 format, which was true as of last check and is also what OSM uses.
|
||||
- Run from your shell: `shp2pgsql -s 4326 -I Parcels__Public_.shp | psql -d gis -U postgres -W`
|
||||
- `shp2pgsql -s 4326 -I Sonoma_County_Building_Outlines.shp | psql -d gis -U postgres -W`
|
||||
- `shp2pgsql -s 4326 -I osm-buildings-01-03.shp | psql -d gis -U postgres -W`
|
||||
|
||||
Now all the data is in Postgres. For processing and conflation, read through and execute `conflation.sql` as per your comfort level.
|
||||
|
||||
|
||||
### Internal Notes
|
||||
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
|
||||
|
||||
```
|
||||
shp2pgsql -s 4326 -I Parcels__Public_.shp | psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321
|
||||
shp2pgsql -s 4326 -I Sonoma_County_Building_Outlines.shp | psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321
|
||||
psql -d openstreetmap -U openstreetmap -W -h localhost -p 54321 -f osmquery-pgdump.sql
|
||||
|
||||
#unused
|
||||
osm2pgsql -d gis -c --prefix son --slim --extra-attributes --hstore --latlong sonoma-orig-buildings-20201219.osm -U postgres -W`
|
||||
osm2pgsql -d openstreetmap -c --prefix son --slim --extra-attributes --hstore --latlong norcal-latest-20200103.osm.pbf -U openstreetmap -W -H localhost -P 54321
|
||||
```
|
413
conflation.sql
413
conflation.sql
@ -2,61 +2,320 @@
|
||||
ALTER TABLE sonoma_county_building_outlines
|
||||
ADD COLUMN "addr:housenumber" text,
|
||||
ADD COLUMN "addr:street" text,
|
||||
ADD COLUMN loc_geom geometry(multipolygon,32616),
|
||||
ADD COLUMN "addr:unit" text,
|
||||
ADD COLUMN loc_geom geometry(multipolygon,4326),
|
||||
ADD COLUMN conflated boolean DEFAULT FALSE,
|
||||
ADD COLUMN main boolean; -- is it the main building on the parcel?
|
||||
|
||||
-- create local geometry fields and validate geometries
|
||||
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(ST_Transform(geom,32616));
|
||||
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
|
||||
CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom);
|
||||
|
||||
-- added fields for the parcels table
|
||||
ALTER TABLE parcels__public_
|
||||
ADD COLUMN "addr:housenumber" text,
|
||||
ADD COLUMN "addr:street" text,
|
||||
ADD COLUMN loc_geom geometry(multipolygon,32616),
|
||||
ADD COLUMN "addr:unit" text,
|
||||
ADD COLUMN loc_geom geometry(multipolygon,4326),
|
||||
ADD COLUMN building_count integer,
|
||||
ADD COLUMN repeating BOOLEAN DEFAULT FALSE;
|
||||
|
||||
-- create local geometry fields and validate geometries
|
||||
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(ST_Transform(geom,32616));
|
||||
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
|
||||
CREATE INDEX ON parcels__public_ USING GIST (loc_geom);
|
||||
|
||||
-- parse and expand parcel street addresses
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+)$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+)'
|
||||
AND situsfmt1 NOT LIKE '%NONE' AND situsfmt1 NOT SIMILAR TO '([0-9]+) ([A-Z ]+) (AVE|DR|RD|ST|LN|CT|PL|CIR|TER|BLVD|PKWY|HWY)' AND situsfmt1 NOT SIMILAR TO '% ([^A-Z]+)';
|
||||
-- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ limit 100;
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+) AVE$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) AVE$', '\2 Avenue')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) AVE';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) DR$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) DR$', '\2 Drive')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) DR';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) RD$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) RD$', '\2 Road')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) RD';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) ST$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) ST$', '\2 Street')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) ST';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) LN$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) LN$', '\2 Lane')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) LN';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CT$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CT$', '\2 Court')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) CT';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PL$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PL$', '\2 Place')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) PL';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CIR$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) CIR$', '\2 Circle')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) CIR';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) TER$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) TER$', '\2 Terrace')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) TER';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) BLVD$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) BLVD$', '\2 Boulevard')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) BLVD';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PKWY$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) PKWY$', '\2 Parkway')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) PKWY';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) HWY$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z ]+) HWY$', '\2 Highway')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z ]+) HWY';
|
||||
-- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ limit 100;
|
||||
-- TODO: find/handle oddballs like 123A Main St and 123 Main St #4
|
||||
SELECT situsfmt1, "addr:housenumber", "addr:street"
|
||||
FROM public.parcels__public_
|
||||
where "addr:housenumber" is null
|
||||
and situsfmt1 NOT SIMILAR TO '([0-9]+)[A-Z]* [A-Z ]*([0-9]*[A-Z\- ]+)'
|
||||
and situsfmt1 NOT SIMILAR TO '%NONE'
|
||||
and situsfmt1 NOT SIMILAR TO '%#%'
|
||||
and situsfmt1 SIMILAR TO '([0-9]+)% %'
|
||||
ORDER BY gid ASC;
|
||||
|
||||
--
|
||||
-- functions for address parsing
|
||||
--
|
||||
|
||||
create or replace function expand_road(n varchar) RETURNS varchar as $$
|
||||
DECLARE
|
||||
r varchar;
|
||||
BEGIN
|
||||
SELECT INTO r
|
||||
CASE upper(n)
|
||||
WHEN 'ACRD' THEN 'Access Road'
|
||||
WHEN 'AL' THEN 'Alley'
|
||||
WHEN 'ALY' THEN 'Alley'
|
||||
WHEN 'ARC' THEN 'Arcade'
|
||||
WHEN 'AV' THEN 'Avenue'
|
||||
WHEN 'AVE' THEN 'Avenue'
|
||||
WHEN 'BLF' THEN 'Bluff'
|
||||
WHEN 'BLV' THEN 'Boulevard'
|
||||
WHEN 'BLVD' THEN 'Boulevard'
|
||||
WHEN 'BR' THEN 'Bridge'
|
||||
WHEN 'BRG' THEN 'Bridge'
|
||||
WHEN 'BYP' THEN 'Bypass'
|
||||
WHEN 'CDS' THEN 'Cul-de-sac'
|
||||
WHEN 'CIR' THEN 'Circle'
|
||||
WHEN 'CMNS' THEN 'Commons'
|
||||
WHEN 'CONC' THEN 'Concession'
|
||||
WHEN 'CRES' THEN 'Crescent'
|
||||
WHEN 'CRST' THEN 'Crest'
|
||||
WHEN 'CSWY' THEN 'Crossway'
|
||||
WHEN 'CT' THEN 'Court'
|
||||
WHEN 'CTR' THEN 'Center'
|
||||
WHEN 'CV' THEN 'Cove'
|
||||
WHEN 'DR' THEN 'Drive'
|
||||
WHEN 'ET' THEN 'ET'
|
||||
WHEN 'EXPWY' THEN 'Expressway'
|
||||
WHEN 'EXPY' THEN 'Expressway'
|
||||
WHEN 'EXT' THEN 'Extension'
|
||||
WHEN 'FMRD' THEN 'Farm to Market Road'
|
||||
WHEN 'FWY' THEN 'Freeway'
|
||||
WHEN 'GRD' THEN 'Grade'
|
||||
WHEN 'HBR' THEN 'Harbor'
|
||||
WHEN 'HOLW' THEN 'Hollow'
|
||||
WHEN 'HWY' THEN 'Highway'
|
||||
WHEN 'HTS' THEN 'Hights'
|
||||
WHEN 'KY' THEN 'Key'
|
||||
WHEN 'LNDG' THEN 'Landing'
|
||||
WHEN 'LN' THEN 'Lane'
|
||||
WHEN 'LOOP' THEN 'Loop'
|
||||
WHEN 'MALL' THEN 'Mall'
|
||||
WHEN 'MAL' THEN 'Mall'
|
||||
WHEN 'MTWY' THEN 'Motorway'
|
||||
WHEN 'OVAL' THEN 'Oval'
|
||||
WHEN 'OPAS' THEN 'Overpass'
|
||||
WHEN 'OVPS' THEN 'Overpass'
|
||||
WHEN 'PARK' THEN 'Park'
|
||||
WHEN 'PASS' THEN 'Pass'
|
||||
WHEN 'PATH' THEN 'Path'
|
||||
WHEN 'PIKE' THEN 'Pike'
|
||||
WHEN 'PKWY' THEN 'Parkway'
|
||||
WHEN 'PKY' THEN 'Parkway'
|
||||
WHEN 'PL' THEN 'Place'
|
||||
WHEN 'PLZ' THEN 'Plaza'
|
||||
WHEN 'PSGE' THEN 'Passage'
|
||||
WHEN 'PT' THEN 'Point'
|
||||
WHEN 'RAMP' THEN 'Ramp'
|
||||
WHEN 'RDG' THEN 'Ridge'
|
||||
WHEN 'RD' THEN 'Road'
|
||||
WHEN 'RMRD' THEN 'Ranch to Market Road'
|
||||
WHEN 'RNCH' THEN 'Ranch'
|
||||
WHEN 'ROW' THEN 'Row'
|
||||
WHEN 'RTE' THEN 'Route'
|
||||
WHEN 'RUE' THEN 'Rue'
|
||||
WHEN 'RUN' THEN 'Run'
|
||||
WHEN 'SKWY' THEN 'Skyway'
|
||||
WHEN 'SPUR' THEN 'Spur'
|
||||
WHEN 'SQ' THEN 'Square'
|
||||
WHEN 'SR' THEN 'State Route'
|
||||
WHEN 'STCT' THEN 'Street Court'
|
||||
WHEN 'ST' THEN 'Street'
|
||||
WHEN 'STR' THEN 'Stravenue'
|
||||
WHEN 'TER' THEN 'Terrace'
|
||||
WHEN 'TFWY' THEN 'Trafficway'
|
||||
WHEN 'THFR' THEN 'Thoroughfare'
|
||||
WHEN 'THWY' THEN 'Thruway'
|
||||
WHEN 'TPKE' THEN 'Turnpike'
|
||||
WHEN 'TRCE' THEN 'Trace'
|
||||
WHEN 'TRL' THEN 'Trail'
|
||||
WHEN 'TRL' THEN 'Trail'
|
||||
WHEN 'TUNL' THEN 'Tunnel'
|
||||
WHEN 'UNP' THEN 'Underpass'
|
||||
WHEN 'VIA' THEN 'Viaduct'
|
||||
WHEN 'VIS' THEN 'Vista'
|
||||
WHEN 'WALK' THEN 'Walk'
|
||||
WHEN 'WAY' THEN 'Way'
|
||||
WHEN 'WKWY' THEN 'Walkway'
|
||||
WHEN 'XING' THEN 'Crossing'
|
||||
ELSE n
|
||||
|
||||
END;
|
||||
RETURN r;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
create or replace function expand_direction (n varchar) RETURNS varchar AS $$
|
||||
DECLARE
|
||||
dir varchar;
|
||||
BEGIN
|
||||
IF n IS NULL THEN
|
||||
RETURN '';
|
||||
END IF;
|
||||
SELECT INTO dir
|
||||
CASE n
|
||||
WHEN 'N' THEN 'North'
|
||||
WHEN 'NE' THEN 'Northeast'
|
||||
WHEN 'NW' THEN 'Northwest'
|
||||
WHEN 'E' THEN 'East'
|
||||
WHEN 'W' THEN 'West'
|
||||
WHEN 'S' THEN 'South'
|
||||
WHEN 'SE' THEN 'Southeast'
|
||||
WHEN 'SW'THEN 'Southwest'
|
||||
ELSE n
|
||||
END;
|
||||
RETURN dir;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
--
|
||||
-- start parsing addresses
|
||||
--
|
||||
|
||||
-- reset our fields
|
||||
update parcels__public_ SET "addr:housenumber" = NULL,
|
||||
"addr:street" = NULL,
|
||||
"addr:unit" = NULL
|
||||
where "addr:housenumber" IS NOT NULL;
|
||||
|
||||
-- basic 123 Main with no common suffixes or numbers
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)'
|
||||
AND situsfmt1 NOT LIKE '%NONE';
|
||||
|
||||
|
||||
-- convenient query to check status as you go
|
||||
select count(*), min(situsfmt1), min("addr:housenumber"), max("addr:housenumber"), "addr:street", min("addr:unit") from parcels__public_
|
||||
where "addr:housenumber" is not null
|
||||
group by "addr:street"
|
||||
order by count desc;
|
||||
|
||||
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)';
|
||||
-- select situsfmt1, "addr:housenumber", "addr:street" from parcels__public_ where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HIGHWAY ([0-9]+)';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ARROWHEAD MTN TRL$', '\1')),
|
||||
"addr:street" = 'Arrowhead Mountain Trail' where situsfmt1 SIMILAR TO '([0-9]+) ARROWHEAD MTN TRL';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S FITCH MTN RD$', '\1')),
|
||||
"addr:street" = 'South Fitch Mountain Road' where situsfmt1 SIMILAR TO '([0-9]+) S FITCH MTN RD';
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S MCDOWELL EXT BLVD$', '\1')),
|
||||
"addr:street" = 'South McDowell Boulevard Extension' where situsfmt1 SIMILAR TO '([0-9]+) S MCDOWELL EXT BLVD';
|
||||
|
||||
-- basic 123 Main St
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\1')), -- 123
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})';
|
||||
-- now 123 Twin Oaks Ln or 123 St Oaks Pl
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
|
||||
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los
|
||||
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Pl / Main
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Dr / Oak
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
|
||||
-- now 123 E Cherry Creek Rd
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
|
||||
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South
|
||||
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Cherry
|
||||
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\4 ')) -- Creek
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Street
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})';
|
||||
|
||||
-- basic directional 123 S Main St
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\1')), -- 123
|
||||
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2'))) -- South
|
||||
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', ' \3 ')) -- Main / 4th
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\4'))) -- Street
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{1,99}) ([A-Z]{2,99})';
|
||||
|
||||
-- and the ever lovable 123 Main St S
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\2 ')) -- Main / 4th
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street
|
||||
|| ' ' -- space
|
||||
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South)
|
||||
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})';
|
||||
|
||||
-- and the even more lovable 123 Main Hill St S
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123
|
||||
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\2'))) -- Main
|
||||
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', ' \3 ')) -- Hill
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- Street
|
||||
|| ' ' -- space
|
||||
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South)
|
||||
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})';
|
||||
|
||||
|
||||
-- for these apartment numbers we're gonna need to start using some functions
|
||||
-- no direction but three words in street name
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2 \3 ')) -- La Main
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)';
|
||||
-- suffix direction
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\3'))) -- Street
|
||||
|| ' ' -- space
|
||||
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- S (South)
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)';
|
||||
-- prefix direction
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
|
||||
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2'))) -- S (South)
|
||||
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)';
|
||||
-- no direction but two words in street name
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|
||||
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3'))), -- Street
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)';
|
||||
-- no direction but one word in street name
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2')), -- Main / 4th
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2
|
||||
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)';
|
||||
|
||||
-- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\2 Street Extension')) where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) EXT ST';
|
||||
|
||||
-- 123 B MEADOWBROOK CT is a unique case that actually needs to be 123 Meadowbrook Court, Unit B
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]) MEADOWBROOK CT$', '\1')),
|
||||
"addr:street" = 'Meadowbrook Court',
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]) MEADOWBROOK CT$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]) MEADOWBROOK CT';
|
||||
|
||||
-- 123 HWY 116 #C1 is a unique case that needs to be 123 Highway 116, Unit C1
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\1')),
|
||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')),
|
||||
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3'))
|
||||
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)';
|
||||
|
||||
-- TODO: replace Mc([a-z]) with Mc(upper(\1)) when preceded by nothingness or a space
|
||||
-- capitalize Macarthur, Macfarlane, Mackinnon but not Macaw Mackey Mackl[iy]n or Macmahan
|
||||
-- investigate Blank Road
|
||||
-- consider "0" housenumbers
|
||||
-- remove leading zeroes in housenumbers
|
||||
-- remove "Ste" from unit
|
||||
-- 5330 OLD REDWOOD HWY #A B & C
|
||||
-- 27801 STEWARTS PT SKAGGS SPRGS RD
|
||||
-- 10000 FRANZ VALLEY SCHOOL RD
|
||||
-- 1003 HWY 116 N
|
||||
-- 1382 HWY 116 S #1
|
||||
-- 100 SPRING MTN SUMMIT TRL
|
||||
-- 1055 BROADWAY #C, D
|
||||
-- 1055 BROADWAY #E - H
|
||||
-- 10826 SUMMER HOME PARK RD
|
||||
-- 1323 W DRY CREEK RD #2
|
||||
-- 1340 19TH HOLE DR
|
||||
-- 14521 CANYON 2 RD
|
||||
-- 14578 CANYON 1 RD
|
||||
-- 1460 TOWN & COUNTRY DR
|
||||
-- 99 e SHILOH RD
|
||||
|
||||
--
|
||||
-- Match buildings to parcels
|
||||
--
|
||||
|
||||
|
||||
-- identify repeating parcels (indicates multiple addresses associated with buildings)
|
||||
WITH geom_counts AS (
|
||||
SELECT array_agg(gid) AS ids, COUNT(*)
|
||||
@ -151,6 +410,11 @@ WHERE
|
||||
|
||||
-- result: 44090
|
||||
|
||||
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL;
|
||||
-- result: 123793
|
||||
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NULL AND "addr:street" IS NULL;
|
||||
-- result: 155217
|
||||
|
||||
-- try to assign multiple addresses from multiple parcels to single buildings
|
||||
WITH addresses AS (
|
||||
SELECT
|
||||
@ -172,9 +436,11 @@ UPDATE sonoma_county_building_outlines AS b SET
|
||||
FROM addresses AS a
|
||||
WHERE a.gid = b.gid;
|
||||
|
||||
-------------------------------------------------------TODO
|
||||
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
|
||||
-- result: 0, may not be working TODO
|
||||
|
||||
-- try to identify addresses for buildings across multiple parcels
|
||||
-- todo: this may not have done anything
|
||||
WITH addresses AS (
|
||||
SELECT
|
||||
b.gid,
|
||||
@ -200,9 +466,25 @@ WHERE
|
||||
count = 1 AND -- only simple cases!
|
||||
a.gid = b.gid;
|
||||
|
||||
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
|
||||
-- result: 0, may not be working TODO
|
||||
|
||||
|
||||
-- identify intersecting/conflated buildings
|
||||
|
||||
--
|
||||
-- RUN ONLY ONE
|
||||
--
|
||||
|
||||
-- IF USING Overpass -> QGIS -> Postgres Dump:
|
||||
UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE;
|
||||
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
|
||||
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
|
||||
FROM osmquery_buildings_pgdump AS osm
|
||||
WHERE ST_Intersects(b.geom,osm.wkb_geometry)
|
||||
AND osm.building IS NOT NULL and osm.building != 'no';
|
||||
|
||||
-- IF USING a direct OSM2PGSQL import:
|
||||
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
|
||||
FROM son_polygon AS osm
|
||||
WHERE ST_Intersects(b.geom,osm.way)
|
||||
AND osm.building IS NOT NULL and osm.building != 'no';
|
||||
@ -213,26 +495,20 @@ FROM son_polygon AS osm
|
||||
|
||||
-- first do conflated buildings
|
||||
with poly as (
|
||||
SELECT
|
||||
gid,
|
||||
"addr:housenumber",
|
||||
"addr:street",
|
||||
est_h_feet,
|
||||
storyabove,
|
||||
storybelow,
|
||||
cwwuse,
|
||||
(st_dump(loc_geom)).*
|
||||
SELECT
|
||||
gid,
|
||||
"addr:housenumber",
|
||||
"addr:street",
|
||||
"addr:unit",
|
||||
(st_dump(loc_geom)).*
|
||||
FROM sonoma_county_building_outlines
|
||||
WHERE conflated
|
||||
)
|
||||
SELECT
|
||||
poly.gid,
|
||||
poly."addr:housenumber",
|
||||
poly."addr:street",
|
||||
poly.est_h_feet,
|
||||
poly.storyabove,
|
||||
poly.storybelow,
|
||||
poly.cwwuse,
|
||||
poly."addr:street",
|
||||
poly."addr:unit",
|
||||
ST_Transform(baz.geom,4326) AS geom
|
||||
INTO simplified_conflated_buildings
|
||||
FROM (
|
||||
@ -251,29 +527,32 @@ WHERE
|
||||
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
|
||||
ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid);
|
||||
|
||||
|
||||
-- 233966 duplicated, deleted smaller
|
||||
-- 248900 duplicated, deleted smaller
|
||||
-- 246427 duplicated, deleted smaller
|
||||
-- 240471 duplicated, deleted smaller
|
||||
-- 277549 duplicated, deleted smaller
|
||||
-- 269953
|
||||
|
||||
|
||||
-- next do non-conflated buldings separately
|
||||
with poly as (
|
||||
SELECT
|
||||
gid,
|
||||
"addr:housenumber",
|
||||
"addr:street",
|
||||
est_h_feet,
|
||||
storyabove,
|
||||
storybelow,
|
||||
cwwuse,
|
||||
(st_dump(loc_geom)).*
|
||||
gid,
|
||||
"addr:housenumber",
|
||||
"addr:street",
|
||||
"addr:unit",
|
||||
(st_dump(loc_geom)).*
|
||||
FROM sonoma_county_building_outlines
|
||||
WHERE NOT conflated -- note: NOT
|
||||
WHERE NOT conflated --note: NOT
|
||||
)
|
||||
SELECT
|
||||
poly.gid,
|
||||
poly."addr:housenumber",
|
||||
poly."addr:street",
|
||||
poly.est_h_feet,
|
||||
poly.storyabove,
|
||||
poly.storybelow,
|
||||
poly.cwwuse,
|
||||
ST_Transform(baz.geom,4326) AS geom
|
||||
poly.gid,
|
||||
poly."addr:housenumber",
|
||||
poly."addr:street",
|
||||
poly."addr:unit",
|
||||
ST_Transform(baz.geom,4326) AS geom
|
||||
INTO simplified_buildings
|
||||
FROM (
|
||||
SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom
|
||||
|
Loading…
x
Reference in New Issue
Block a user