mirror of
https://github.com/zyphlar/sonoma-import.git
synced 2024-03-08 15:07:48 +00:00
Parse state and city better
This commit is contained in:
parent
7624a68152
commit
5bdc629114
|
@ -119,9 +119,8 @@ Please ensure you are logged in under a dedicated import account with a user nam
|
||||||
### Internal Notes
|
### Internal Notes
|
||||||
|
|
||||||
- TODO:
|
- TODO:
|
||||||
- TAZ CIDs aren't working, each CID is a full 200mb export
|
- Spot check buildings without addresses
|
||||||
- Address conflation isn't working, no buildings have numbers
|
- Double check if geometry is adequately simplified or not
|
||||||
- Consider simplifying geometry, ~50 buildings are drawn twice
|
|
||||||
|
|
||||||
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
|
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ ALTER TABLE sonoma_county_building_outlines
|
||||||
ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE,
|
ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE,
|
||||||
ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel?
|
ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel?
|
||||||
|
|
||||||
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL;
|
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL, conflated = FALSE, main = FALSE;
|
||||||
|
|
||||||
-- create local geometry fields and validate geometries
|
-- create local geometry fields and validate geometries
|
||||||
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
|
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
|
||||||
|
@ -28,12 +28,11 @@ ALTER TABLE parcels__public_
|
||||||
ADD COLUMN IF NOT EXISTS "addr:unit" text,
|
ADD COLUMN IF NOT EXISTS "addr:unit" text,
|
||||||
ADD COLUMN IF NOT EXISTS "addr:city" text,
|
ADD COLUMN IF NOT EXISTS "addr:city" text,
|
||||||
ADD COLUMN IF NOT EXISTS "addr:state" text,
|
ADD COLUMN IF NOT EXISTS "addr:state" text,
|
||||||
ADD COLUMN IF NOT EXISTS usecode integer,
|
|
||||||
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
|
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
|
||||||
ADD COLUMN IF NOT EXISTS building_count integer,
|
ADD COLUMN IF NOT EXISTS building_count integer,
|
||||||
ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE;
|
ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE;
|
||||||
|
|
||||||
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
|
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, building_count = NULL, repeating = FALSE;
|
||||||
|
|
||||||
-- create local geometry fields and validate geometries
|
-- create local geometry fields and validate geometries
|
||||||
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
|
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
|
||||||
|
@ -188,13 +187,39 @@ $$ LANGUAGE plpgsql;
|
||||||
-- reset our fields
|
-- reset our fields
|
||||||
update parcels__public_ SET "addr:housenumber" = NULL,
|
update parcels__public_ SET "addr:housenumber" = NULL,
|
||||||
"addr:street" = NULL,
|
"addr:street" = NULL,
|
||||||
"addr:unit" = NULL
|
"addr:unit" = NULL,
|
||||||
where "addr:housenumber" IS NOT NULL;
|
"addr:city" = NULL,
|
||||||
|
"addr:state" = NULL;
|
||||||
|
|
||||||
|
-- parse city and state
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\1')),
|
||||||
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\2')
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||||
|
-- with spaces
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\1')),
|
||||||
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\2')
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||||
|
-- with spaces and zip
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\1')),
|
||||||
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\2')
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+' AND "addr:city" IS NULL;
|
||||||
|
-- with no asterisk, and "Ca" suffix
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA$', '\1')),
|
||||||
|
"addr:state" = 'CA'
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+) CA' AND "addr:city" IS NULL;
|
||||||
|
-- with no asterisk, "CA" state, and zip
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA [0-9]+$', '\1')),
|
||||||
|
"addr:state" = 'CA'
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+) CA [0-9]+' AND "addr:city" IS NULL;
|
||||||
|
|
||||||
|
|
||||||
|
-- some "cities" we don't want, and state needs to be expanded
|
||||||
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unincorp County';
|
||||||
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unknown';
|
||||||
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Bodega Bay';
|
||||||
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Duncans Mills';
|
||||||
|
update parcels__public_ SET "addr:state" = 'California' where "addr:state" = 'CA';
|
||||||
|
|
||||||
-- parse city sratr
|
|
||||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\1')),
|
|
||||||
"addr:state" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\2'))
|
|
||||||
where situsfmt2 SIMILAR TO '([A-Za-z]+)\*? ([A-Za-z]+)' IS NOT NULL;
|
|
||||||
|
|
||||||
-- basic 123 Main with no common suffixes or numbers
|
-- basic 123 Main with no common suffixes or numbers
|
||||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user