mirror of
				https://github.com/zyphlar/sonoma-import.git
				synced 2024-03-08 15:07:48 +00:00 
			
		
		
		
	Parse state and city better
This commit is contained in:
		
							parent
							
								
									7624a68152
								
							
						
					
					
						commit
						5bdc629114
					
				@ -119,9 +119,8 @@ Please ensure you are logged in under a dedicated import account with a user nam
 | 
				
			|||||||
### Internal Notes
 | 
					### Internal Notes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- TODO:
 | 
					- TODO:
 | 
				
			||||||
 - TAZ CIDs aren't working, each CID is a full 200mb export
 | 
					  - Spot check buildings without addresses
 | 
				
			||||||
 - Address conflation isn't working, no buildings have numbers
 | 
					  - Double check if geometry is adequately simplified or not
 | 
				
			||||||
 - Consider simplifying geometry, ~50 buildings are drawn twice
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
 | 
					- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -15,7 +15,7 @@ ALTER TABLE sonoma_county_building_outlines
 | 
				
			|||||||
	ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE,
 | 
						ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE,
 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel?
 | 
						ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL;
 | 
					update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL, conflated = FALSE, main = FALSE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- create local geometry fields and validate geometries
 | 
					-- create local geometry fields and validate geometries
 | 
				
			||||||
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
 | 
					UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
 | 
				
			||||||
@ -28,12 +28,11 @@ ALTER TABLE parcels__public_
 | 
				
			|||||||
	ADD COLUMN IF NOT EXISTS "addr:unit" text,
 | 
						ADD COLUMN IF NOT EXISTS "addr:unit" text,
 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS "addr:city" text,
 | 
						ADD COLUMN IF NOT EXISTS "addr:city" text,
 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS "addr:state" text,
 | 
						ADD COLUMN IF NOT EXISTS "addr:state" text,
 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS usecode integer,
 | 
					 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
 | 
						ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS building_count integer,
 | 
						ADD COLUMN IF NOT EXISTS building_count integer,
 | 
				
			||||||
	ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE;
 | 
						ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
 | 
					update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, building_count = NULL, repeating = FALSE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- create local geometry fields and validate geometries
 | 
					-- create local geometry fields and validate geometries
 | 
				
			||||||
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
 | 
					UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
 | 
				
			||||||
@ -188,13 +187,39 @@ $$ LANGUAGE plpgsql;
 | 
				
			|||||||
-- reset our fields
 | 
					-- reset our fields
 | 
				
			||||||
update parcels__public_ SET "addr:housenumber" = NULL,
 | 
					update parcels__public_ SET "addr:housenumber" = NULL,
 | 
				
			||||||
    "addr:street" = NULL,
 | 
					    "addr:street" = NULL,
 | 
				
			||||||
    "addr:unit" = NULL
 | 
					    "addr:unit" = NULL,
 | 
				
			||||||
    where "addr:housenumber" IS NOT NULL;
 | 
					    "addr:city" = NULL,
 | 
				
			||||||
 | 
					    "addr:state" = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- parse city and state
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\1')),
 | 
				
			||||||
 | 
					    "addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\2')
 | 
				
			||||||
 | 
					    where situsfmt2 SIMILAR TO '([A-Za-z]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
 | 
				
			||||||
 | 
					-- with spaces
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\1')),
 | 
				
			||||||
 | 
					    "addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\2')
 | 
				
			||||||
 | 
					    where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
 | 
				
			||||||
 | 
					-- with spaces and zip
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\1')),
 | 
				
			||||||
 | 
					    "addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\2')
 | 
				
			||||||
 | 
					    where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+' AND "addr:city" IS NULL;
 | 
				
			||||||
 | 
					-- with no asterisk, and "Ca" suffix
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA$', '\1')),
 | 
				
			||||||
 | 
					    "addr:state" = 'CA'
 | 
				
			||||||
 | 
					    where situsfmt2 SIMILAR TO '([A-Za-z ]+) CA' AND "addr:city" IS NULL;
 | 
				
			||||||
 | 
					-- with no asterisk, "CA" state, and zip
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA [0-9]+$', '\1')),
 | 
				
			||||||
 | 
					    "addr:state" = 'CA'
 | 
				
			||||||
 | 
					    where situsfmt2 SIMILAR TO '([A-Za-z ]+) CA [0-9]+' AND "addr:city" IS NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- some "cities" we don't want, and state needs to be expanded
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unincorp County';
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unknown';
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Bodega Bay';
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Duncans Mills';
 | 
				
			||||||
 | 
					update parcels__public_ SET "addr:state" = 'California' where "addr:state" = 'CA';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- parse city sratr
 | 
					 | 
				
			||||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\1')),
 | 
					 | 
				
			||||||
    "addr:state" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\2'))
 | 
					 | 
				
			||||||
    where situsfmt2 SIMILAR TO '([A-Za-z]+)\*? ([A-Za-z]+)' IS NOT NULL;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- basic 123 Main with no common suffixes or numbers
 | 
					-- basic 123 Main with no common suffixes or numbers
 | 
				
			||||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
 | 
					update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user