mirror of
https://github.com/zyphlar/sonoma-import.git
synced 2024-03-08 15:07:48 +00:00
Parse state and city better
This commit is contained in:
parent
7624a68152
commit
5bdc629114
@ -119,9 +119,8 @@ Please ensure you are logged in under a dedicated import account with a user nam
|
||||
### Internal Notes
|
||||
|
||||
- TODO:
|
||||
- TAZ CIDs aren't working, each CID is a full 200mb export
|
||||
- Address conflation isn't working, no buildings have numbers
|
||||
- Consider simplifying geometry, ~50 buildings are drawn twice
|
||||
- Spot check buildings without addresses
|
||||
- Double check if geometry is adequately simplified or not
|
||||
|
||||
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
|
||||
|
||||
|
@ -15,7 +15,7 @@ ALTER TABLE sonoma_county_building_outlines
|
||||
ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE,
|
||||
ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel?
|
||||
|
||||
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL;
|
||||
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL, conflated = FALSE, main = FALSE;
|
||||
|
||||
-- create local geometry fields and validate geometries
|
||||
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
|
||||
@ -28,12 +28,11 @@ ALTER TABLE parcels__public_
|
||||
ADD COLUMN IF NOT EXISTS "addr:unit" text,
|
||||
ADD COLUMN IF NOT EXISTS "addr:city" text,
|
||||
ADD COLUMN IF NOT EXISTS "addr:state" text,
|
||||
ADD COLUMN IF NOT EXISTS usecode integer,
|
||||
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
|
||||
ADD COLUMN IF NOT EXISTS building_count integer,
|
||||
ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE;
|
||||
|
||||
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
|
||||
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, building_count = NULL, repeating = FALSE;
|
||||
|
||||
-- create local geometry fields and validate geometries
|
||||
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
|
||||
@ -188,13 +187,39 @@ $$ LANGUAGE plpgsql;
|
||||
-- reset our fields
|
||||
update parcels__public_ SET "addr:housenumber" = NULL,
|
||||
"addr:street" = NULL,
|
||||
"addr:unit" = NULL
|
||||
where "addr:housenumber" IS NOT NULL;
|
||||
"addr:unit" = NULL,
|
||||
"addr:city" = NULL,
|
||||
"addr:state" = NULL;
|
||||
|
||||
-- parse city and state
|
||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\1')),
|
||||
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\2')
|
||||
where situsfmt2 SIMILAR TO '([A-Za-z]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||
-- with spaces
|
||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\1')),
|
||||
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\2')
|
||||
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||
-- with spaces and zip
|
||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\1')),
|
||||
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\2')
|
||||
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+' AND "addr:city" IS NULL;
|
||||
-- with no asterisk, and "Ca" suffix
|
||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA$', '\1')),
|
||||
"addr:state" = 'CA'
|
||||
where situsfmt2 SIMILAR TO '([A-Za-z ]+) CA' AND "addr:city" IS NULL;
|
||||
-- with no asterisk, "CA" state, and zip
|
||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA [0-9]+$', '\1')),
|
||||
"addr:state" = 'CA'
|
||||
where situsfmt2 SIMILAR TO '([A-Za-z ]+) CA [0-9]+' AND "addr:city" IS NULL;
|
||||
|
||||
|
||||
-- some "cities" we don't want, and state needs to be expanded
|
||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unincorp County';
|
||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unknown';
|
||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Bodega Bay';
|
||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Duncans Mills';
|
||||
update parcels__public_ SET "addr:state" = 'California' where "addr:state" = 'CA';
|
||||
|
||||
-- parse city sratr
|
||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\1')),
|
||||
"addr:state" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\2'))
|
||||
where situsfmt2 SIMILAR TO '([A-Za-z]+)\*? ([A-Za-z]+)' IS NOT NULL;
|
||||
|
||||
-- basic 123 Main with no common suffixes or numbers
|
||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
|
||||
|
Loading…
x
Reference in New Issue
Block a user