Prepping for slicing, fixing dangling addresses, adding city/state/use/gid

This commit is contained in:
Will Bradley 2021-02-04 10:43:33 -08:00
parent a9a8646c46
commit bef81198a8
8 changed files with 526 additions and 73 deletions

3
.gitignore vendored
View File

@ -14,3 +14,6 @@ data/
# VIM stuff # VIM stuff
*.swp *.swp
# Python
__pycache__

4
103240.sql Normal file
View File

@ -0,0 +1,4 @@
INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext)
values (103240, 'ESRI', 103240,
'+proj=lcc +ellps=GRS80 +units=us-ft +x_0=2000000.0 +y_0=500000.0 +lon_0=120d30''W +lat_0=36d30''N +lat_1=37d4''N +lat_2=38d26''N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs ',
'PROJCS["NAD_1983_CORS96_StatePlane_California_III_FIPS_0403_Ft_US",GEOGCS["GCS_NAD_1983_CORS96",DATUM["D_NAD_1983_CORS96",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",6561666.666666666],PARAMETER["False_Northing",1640416.666666667],PARAMETER["Central_Meridian",-120.5],PARAMETER["Standard_Parallel_1",37.06666666666667],PARAMETER["Standard_Parallel_2",38.43333333333333],PARAMETER["Latitude_Of_Origin",36.5],UNIT["Foot_US",0.3048006096012192],AUTHORITY["Esri",103240]]');

View File

@ -59,7 +59,10 @@ Debian (shp2pgsql is included in postgis)
- The postgresql server started/running/configured and database `openstreetmap` created, generally at localhost port 5432. - The postgresql server started/running/configured and database `openstreetmap` created, generally at localhost port 5432.
- Osmosis, for export - For export: sudo apt-get install -y gdal-bin python-lxml
- ogr2osm https://github.com/pnorman/ogr2osm
- Maybe also/instead: python3-gdal
- Do NOT install the osgeo package from pip, it's empty and will cause ogr import errors.
## Running ## Running

View File

@ -1,28 +1,41 @@
-- TODO:
-- - parse `usecode` or `usecdesc` for parcel type (residential / school / biz / etc)
-- - parse city and state as well for mailing
-- add fields for OSM tags and data processing -- add fields for OSM tags and data processing
ALTER TABLE sonoma_county_building_outlines ALTER TABLE sonoma_county_building_outlines
ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text, ADD COLUMN "addr:street" text,
ADD COLUMN "addr:unit" text, ADD COLUMN "addr:unit" text,
ADD COLUMN "addr:city" text,
ADD COLUMN "addr:state" text,
ADD COLUMN usecode integer,
ADD COLUMN loc_geom geometry(multipolygon,4326), ADD COLUMN loc_geom geometry(multipolygon,4326),
ADD COLUMN conflated boolean DEFAULT FALSE, ADD COLUMN conflated boolean DEFAULT FALSE,
ADD COLUMN main boolean; -- is it the main building on the parcel? ADD COLUMN main boolean; -- is it the main building on the parcel?
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
-- create local geometry fields and validate geometries -- create local geometry fields and validate geometries
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom); --UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); --CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom);
-- added fields for the parcels table -- added fields for the parcels table
ALTER TABLE parcels__public_ ALTER TABLE parcels__public_
ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text, ADD COLUMN "addr:street" text,
ADD COLUMN "addr:unit" text, ADD COLUMN "addr:unit" text,
ADD COLUMN "addr:city" text,
ADD COLUMN "addr:state" text,
ADD COLUMN loc_geom geometry(multipolygon,4326), ADD COLUMN loc_geom geometry(multipolygon,4326),
ADD COLUMN building_count integer, ADD COLUMN building_count integer,
ADD COLUMN repeating BOOLEAN DEFAULT FALSE; ADD COLUMN repeating BOOLEAN DEFAULT FALSE;
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
-- create local geometry fields and validate geometries -- create local geometry fields and validate geometries
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom); --UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
CREATE INDEX ON parcels__public_ USING GIST (loc_geom); --CREATE INDEX ON parcels__public_ USING GIST (loc_geom);
-- parse and expand parcel street addresses -- parse and expand parcel street addresses
-- TODO: find/handle oddballs like 123A Main St and 123 Main St #4 -- TODO: find/handle oddballs like 123A Main St and 123 Main St #4
@ -60,6 +73,7 @@ BEGIN
WHEN 'CDS' THEN 'Cul-de-sac' WHEN 'CDS' THEN 'Cul-de-sac'
WHEN 'CIR' THEN 'Circle' WHEN 'CIR' THEN 'Circle'
WHEN 'CMNS' THEN 'Commons' WHEN 'CMNS' THEN 'Commons'
WHEN 'CNTR' THEN 'Center'
WHEN 'CONC' THEN 'Concession' WHEN 'CONC' THEN 'Concession'
WHEN 'CRES' THEN 'Crescent' WHEN 'CRES' THEN 'Crescent'
WHEN 'CRST' THEN 'Crest' WHEN 'CRST' THEN 'Crest'
@ -85,6 +99,7 @@ BEGIN
WHEN 'LOOP' THEN 'Loop' WHEN 'LOOP' THEN 'Loop'
WHEN 'MALL' THEN 'Mall' WHEN 'MALL' THEN 'Mall'
WHEN 'MAL' THEN 'Mall' WHEN 'MAL' THEN 'Mall'
WHEN 'MTN' THEN 'Mountain'
WHEN 'MTWY' THEN 'Motorway' WHEN 'MTWY' THEN 'Motorway'
WHEN 'OVAL' THEN 'Oval' WHEN 'OVAL' THEN 'Oval'
WHEN 'OPAS' THEN 'Overpass' WHEN 'OPAS' THEN 'Overpass'
@ -109,6 +124,8 @@ BEGIN
WHEN 'RUE' THEN 'Rue' WHEN 'RUE' THEN 'Rue'
WHEN 'RUN' THEN 'Run' WHEN 'RUN' THEN 'Run'
WHEN 'SKWY' THEN 'Skyway' WHEN 'SKWY' THEN 'Skyway'
WHEN 'SPGS' THEN 'Springs'
WHEN 'SPRGS' THEN 'Springs'
WHEN 'SPUR' THEN 'Spur' WHEN 'SPUR' THEN 'Spur'
WHEN 'SQ' THEN 'Square' WHEN 'SQ' THEN 'Square'
WHEN 'SR' THEN 'State Route' WHEN 'SR' THEN 'State Route'
@ -172,6 +189,11 @@ update parcels__public_ SET "addr:housenumber" = NULL,
"addr:unit" = NULL "addr:unit" = NULL
where "addr:housenumber" IS NOT NULL; where "addr:housenumber" IS NOT NULL;
-- parse city sratr
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\1')),
"addr:state" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\2'))
where situsfmt2 SIMILAR TO '([A-Za-z]+)\*? ([A-Za-z]+)' IS NOT NULL;
-- basic 123 Main with no common suffixes or numbers -- basic 123 Main with no common suffixes or numbers
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')), update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)' "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)'
@ -201,12 +223,14 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})';
-- now 123 Twin Oaks Ln or 123 St Oaks Pl -- now 123 Twin Oaks Ln or 123 St Oaks Pl or 12690 Redwood Hwy So or 1300 19th Hole Dr
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los "addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los / 19th
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Pl / Main || ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Dr / Oak || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Pl / Main
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})'; || ' ' -- space
|| initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4')))) -- Dr / Oak / So
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- now 123 E Cherry Creek Rd -- now 123 E Cherry Creek Rd
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South
@ -228,7 +252,27 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street
|| ' ' -- space || ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South) || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South)
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})';
-- and 14521 CANYON 2 RD
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\2')) -- Canyon
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\3'))) -- 2
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\4'))) -- Rd
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})';
-- and 15560 UPPER CANYON 3 RD
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\2')) -- Upper
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\3'))) -- Canyon
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\4'))) -- 2
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\5'))) -- Rd
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})';
-- and the even more lovable 123 Main Hill St S -- and the even more lovable 123 Main Hill St S
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123
@ -239,6 +283,54 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South) || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South)
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})'; where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})';
-- now 10000 FRANZ VALLEY SCHOOL RD / 6401 MTN VIEW RANCH RD / 3762 MANOR LN WEST BRANCH / 222 RAGLE RD SOUTH RD/ 300 ROHNERT PARK EXPWY WEST
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 10000
"addr:street" = initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2')))) -- Franz
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Valley
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- School
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Road
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- now 27801 STEWARTS PT SKAGGS SPRGS RD
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 27801
"addr:street" = initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2')))) -- Stewarts
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Point
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Skaggs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Springs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\6'))) -- Road
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- 131 LYNCH CREEK A WAY
-- 935 W SPAIN UNIT B ST
-- now 1706 B W COLLEGE AVE
-- now 2347 MARIA LUZ E CT
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\3'))) -- Drive
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})';
-- 622 ELY S BLVD
-- 292 ELY BLVD S BLVD
-- now 1460 TOWN & COUNTRY DR
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\3'))) -- Drive
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})';
-- for these apartment numbers we're gonna need to start using some functions -- for these apartment numbers we're gonna need to start using some functions
-- no direction but three words in street name -- no direction but three words in street name
@ -247,7 +339,7 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- suffix direction -- suffix direction and two words
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Street || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Street
@ -255,24 +347,60 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- S (South) || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- S (South)
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)';
-- prefix direction -- prefix direction and two words
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South) "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South)
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction but two words in street name -- prefix direction and three words like 1323 W DRY CREEK RD #2
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South)
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Dry
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))) -- Creek/Ext
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5'))), -- Road
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\6')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction and two words in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))), -- Street || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2 "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction but one word in street name -- no direction and one word in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2')), -- Main / 4th "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2')), -- Main / 4th
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2 "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction and five words in street name like 31510 STEWARTS PT SKAGGS SPRGS RD #B
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 31510
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- Stewarts
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Point
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))) -- Skaggs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5'))) -- Springs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\6'))), -- Road
"addr:unit" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\7'))) -- Unit B
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction, two words in street name, and "STE XXX" or "Ste XXX" in the unit
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\4')) -- STE 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\4')) -- Ste 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)';
-- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension -- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')), update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')),
@ -289,26 +417,46 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3')) "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3'))
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)'; where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)';
-- TODO: replace Mc([a-z]) with Mc(upper(\1)) when preceded by nothingness or a space -- 123 HWY 116 N #C1 is a unique case that needs to be 123 Highway 116 North, Unit C1
-- capitalize Macarthur, Macfarlane, Mackinnon but not Macaw Mackey Mackl[iy]n or Macmahan update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\1')), -- 123
-- investigate Blank Road "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')) -- Highway 116
-- consider "0" housenumbers || ' ' -- space
-- remove leading zeroes in housenumbers || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\3'))), -- North
-- remove "Ste" from unit "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\4')) -- Unit C1
-- 5330 OLD REDWOOD HWY #A B & C where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)';
-- 27801 STEWARTS PT SKAGGS SPRGS RD
-- 10000 FRANZ VALLEY SCHOOL RD -- 3333 STEWART PT SKAGGS SPRING RD is a unique case that needs to be Stewarts Point-Skaggs Springs Road
-- 1003 HWY 116 N update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) STEWART PT SKAGGS SPRING RD$', '\1')),
-- 1382 HWY 116 S #1 "addr:street" = 'Stewarts Point-Skaggs Springs Road'
-- 100 SPRING MTN SUMMIT TRL where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) STEWART PT SKAGGS SPRING RD';
-- 1055 BROADWAY #C, D
-- 1055 BROADWAY #E - H -- remove "Ste", "Kandace", "Starr" from unit
-- 10826 SUMMER HOME PARK RD update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Ste', '') where "addr:unit" LIKE '%Ste%';
-- 1323 W DRY CREEK RD #2 update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Kandace', '') where "addr:unit" LIKE '%Kandace%';
-- 1340 19TH HOLE DR update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Starr', '') where "addr:unit" LIKE '%Starr%';
-- 14521 CANYON 2 RD -- remove "Ln" from unit and move it to the street
-- 14578 CANYON 1 RD update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Ln', ''),
-- 1460 TOWN & COUNTRY DR "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+#LN$', '\2 Lane'))
where situsfmt1 LIKE '%#LN%';
-- properly categorize certain Scottish last names (we're drawing the line at Mackey and non-Scottish Mc* / Mac*)
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mcarthur', 'McArthur') where "addr:street" LIKE '%Mcarthur%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mcdowell', 'McDowell') where "addr:street" LIKE '%Mcdowell%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macarthur', 'MacArthur') where "addr:street" LIKE '%Macarthur%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macfarlane', 'MacFarlane') where "addr:street" LIKE '%Macfarlane%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mackinnon', 'MacKinnon') where "addr:street" LIKE '%Mackinnon%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macmahan', 'MacMahan') where "addr:street" LIKE '%Macmahan%';
-- Stewarts Point-Skaggs Springs Road is the OpenStreetMap name for this street, override
update parcels__public_ SET "addr:street" = 'Stewarts Point-Skaggs Springs Road' where situsfmt1 LIKE '%STEWART%SKAGG%';
-- FYI this dataset has "Blank Road" but that is an actual real road
-- TODO: consider "0" housenumbers
-- 900 TRANSPORT WAY #A&B
-- 21075 RIVER BLVD #1 & 2
-- 34 A&B RANDALL LN
-- 34 A & B RANDALL LN
-- 99 e SHILOH RD -- 99 e SHILOH RD
-- --
@ -345,7 +493,13 @@ FROM bcounts WHERE bcounts.gid = parcels__public_.gid;
UPDATE sonoma_county_building_outlines SET "addr:housenumber" = NULL, "addr:street" = NULL; UPDATE sonoma_county_building_outlines SET "addr:housenumber" = NULL, "addr:street" = NULL;
WITH a AS ( WITH a AS (
SELECT SELECT
b.gid, p."addr:housenumber", p."addr:street" b.gid,
p."addr:housenumber",
p."addr:street",
p."addr:unit",
p."addr:city",
p."addr:state",
p.usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -353,7 +507,11 @@ WITH a AS (
) )
UPDATE sonoma_county_building_outlines SET UPDATE sonoma_county_building_outlines SET
"addr:housenumber" = a."addr:housenumber", "addr:housenumber" = a."addr:housenumber",
"addr:street" = a."addr:street" "addr:street" = a."addr:street",
"addr:unit" = a."addr:unit",
"addr:city" = a."addr:city",
"addr:state" = a."addr:state",
"x_son_imp:usecode" = a.usecode
FROM a WHERE sonoma_county_building_outlines.gid = a.gid; FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL; --SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL;
@ -383,7 +541,7 @@ FROM sizes WHERE sizes.bid = sonoma_county_building_outlines.gid;
-- now assign addresses to main buildings on parcels with outbuildings -- now assign addresses to main buildings on parcels with outbuildings
WITH a AS ( WITH a AS (
SELECT SELECT
b.gid, p."addr:housenumber", p."addr:street" b.gid, p."addr:housenumber", p."addr:street", p.usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -394,7 +552,8 @@ WITH a AS (
) )
UPDATE sonoma_county_building_outlines SET UPDATE sonoma_county_building_outlines SET
"addr:housenumber" = a."addr:housenumber", "addr:housenumber" = a."addr:housenumber",
"addr:street" = a."addr:street" "addr:street" = a."addr:street",
"usecode" = a.usecode
FROM a WHERE sonoma_county_building_outlines.gid = a.gid; FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
-- get a count of outbuildings so we know how many addresses are intentionally unassigned -- get a count of outbuildings so we know how many addresses are intentionally unassigned
@ -420,7 +579,8 @@ WITH addresses AS (
SELECT SELECT
b.gid, b.gid,
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber, array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber,
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
p.usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -432,7 +592,8 @@ WITH addresses AS (
) )
UPDATE sonoma_county_building_outlines AS b SET UPDATE sonoma_county_building_outlines AS b SET
"addr:housenumber" = housenumber, "addr:housenumber" = housenumber,
"addr:street" = street "addr:street" = street,
"usecode" = a.usecode
FROM addresses AS a FROM addresses AS a
WHERE a.gid = b.gid; WHERE a.gid = b.gid;
@ -568,3 +729,36 @@ FROM (
WHERE WHERE
ST_Intersects(poly.geom, baz.geom) ST_Intersects(poly.geom, baz.geom)
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
------- TODO
alter table sonoma_building_outlines add column cid integer;
-- Drop TAZs that aren't near SJ
with hull as (
select ST_ConvexHull(ST_Collect(geom)) as geom from (
union select geom
from "sonoma_building_outlines"
) as geom)
delete from VTATaz
using hull
where not ST_Intersects(VTATaz.geom, hull.geom);
-- Assign cluster to each data point
update sonoma_building_outlines as t
set cid = taggedThing.key
from (
select (row_number() over (partition by sonoma_building_outlines.gid order by ST_Distance(sonoma_building_outlines.geom, VTATaz.geom))) as rn,
VTATaz.key, sonoma_building_outlines.gid
from sonoma_building_outlines
join VTATaz
on ST_Intersects(sonoma_building_outlines.geom, VTATaz.geom)
) as taggedThing
where t.gid = taggedThing.gid and rn = 1;
-- More specifically drop TAZs that don't have any SJ data in them
delete from VTATaz
where key not in (
select distinct cid from sonoma_building_outlines
);

139
reference/basemap.py Normal file
View File

@ -0,0 +1,139 @@
def filterLayer(layer):
if layer is None:
print("filterLayer: empty")
return None
print(layer.GetName())
#if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]:
if layer.GetName() in ["buildingfootprint_filtered", "Site_Address_Points_filtered", "mergedbuildings_filtered", "namedparcels_filtered"]:
return layer
def mergeToRanges(ls):
""" Takes a list like ['1', '2', '3', '5', '8', 9'] and returns a list like
['1-3', '5', '8', '9'] """
if len(ls) < 2:
return ls
i = 0
while i < len(ls)-1 and \
((ls[i].isdigit() and ls[i+1].isdigit() and \
int(ls[i])+1 == int(ls[i+1])) or \
(len(ls[i]) == 1 and len(ls[i+1]) == 1 and \
ord(ls[i])+1 == ord(ls[i+1]))):
i += 1
if i < 2:
return ls[0:i+1]+mergeToRanges(ls[i+1:])
else:
return [ls[0]+'-'+ls[i]]+mergeToRanges(ls[i+1:])
# I don't actually know if the building heights are in US standard feet or
# survey feet. But the difference is less than the significant digits for the
# tallest building.
SURVEY_FEET_TO_METER = 1200.0/3937.0
def filterTags(attrs):
if attrs is None:
print("filterTags: empty")
return None
tags = {}
if "bldgelev" in attrs:
# BuildingFootprint
tags["building"] = "yes"
# Always appear, has equivalent
tags["height"] = "%.02f"%round(float(attrs["bldgheight"])*SURVEY_FEET_TO_METER, 2)
tags["ele"] = "%.02f"%round(float(attrs["bldgelev"])*SURVEY_FEET_TO_METER, 2)
# Always appear, no equivalent: FACILITYID
# Sometimes appear, no equivalent: LASTUPDATE
# Empty: LENGTH, SHAPE_AREA
if "Inc_Muni" in attrs:
# Site_Address_Points
# Always appear, has equivalent
tags["addr:city"] = attrs["Inc_Muni"]
# Sometimes appear, has equivalent
addr = attrs["Add_Number"]
if addr:
addr = addr.split(';')
m = max(map(len, addr))
addr.sort(key=lambda a: a.rjust(m))
addr = ';'.join(mergeToRanges(addr))
if attrs["AddNum_Suf"]:
addr += " " + attrs["AddNum_Suf"]
tags["addr:housenumber"] = addr
street = attrs["CompName"]
if street:
if street.startswith("St "): street = "Saint"+street[2:]
elif street.startswith("Mt "): street = "Mount"+street[2:]
elif street.startswith("East St "): street = "East Saint"+street[7:]
elif street.startswith("West St "): street = "West Saint"+street[7:]
tags["addr:street"] = street
units = attrs["Unit"]
if units:
units = units.split(';')
m = max(map(len, units))
units.sort(key=lambda a: a.rjust(m))
units = ';'.join(mergeToRanges(units))
tags["addr:unit"] = units
zipcode = attrs["Post_Code"]
if zipcode: tags["addr:postcode"] = zipcode
pt = attrs["Place_Type"]
#if pt == "BU":
#tags["office"] = "yes"
if pt == "ED":
tags["amenity"] = "school"
elif pt == "FB":
tags["amenity"] = "place_of_worship"
elif pt == "GO":
tags["office"] = "government"
elif pt == "GQ":
# Salvation army
tags["amenity"] = "social_facility"
elif pt == "HS":
tags["amenity"] = "hospital"
elif pt == "HT" and not units:
tags["tourism"] = "hotel"
elif pt == "RE":
tags["club"] = "sport"
elif pt == "RT":
tags["amenity"] = "restaurant"
elif pt == "RL":
tags["shop"] = "yes"
elif pt == "TR":
tags["public_transport"] = "platform"
# Always appear, no equivalent: OBJECTID, Site_NGUID, ESN, Lat, Long, Status, Juris_Auth, LastUpdate, LastEditor, GlobalID
# FullMailin could be used for addr:full, but it's unneeded.
# Sometimes appear, no equivalent: RCL_NGUID, StreetMast, ParcelID, CondoParce, UnitID, RSN, PSAP_ID, St_PreDirA, St_PreTyp, StreetName, St_PosTyp, St_PosTypC, St_PosTypU, St_PosDir, Feanme, FullName, Unit_Type, Building, FullUnit, FullAddres, Addtl_Loc, LSt_PreDir, LSt_Name, LSt_Type, Uninc_Comm, Post_Comm, Source, Effective, Notes
# Always the same: Client_ID, County, State, Country, Placement
# Always empty: Site_NGU00, AddNum_Pre, St_PreMod, St_PreDir, St_PreSep, St_PosMod, Floor, Room, Seat, Post_Code4, APN, LStPostDir, AddCode, AddDataURI, Nbrhd_Comm, MSAGComm, LandmkName, Mile_Post, Elev, Expire
if "Inc_Muni" in attrs and "bldgelev" in attrs:
# Merged address/buildings
# other Place_Type are Common Area (multi-use), Miscellaneous
tags["building"] = {"BU": "commercial",
"ED": "school",
"FB": "religious",
"GO": "government",
"HS": "hospital",
"HT": "hotel",
"MH": "static_caravan",
"Condominium": "residential",
"MF": "residential",
"RL": "retail",
"RT": "retail",
"SF": "house"}.get(attrs["Place_Type"], "yes")
if "Addtl_Loc" in attrs and "Inc_Muni" not in attrs:
# Named parcels
tags["landuse"] = "residential"
tags["name"] = attrs["Addtl_Loc"].title()
return tags

View File

@ -79,6 +79,9 @@ for intersects in false true; do
intersectsQuery="not intersectsExisting" intersectsQuery="not intersectsExisting"
fi fi
# The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking
# https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm
ogr2ogr -sql "select 'https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \ ogr2ogr -sql "select 'https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \
-t_srs EPSG:4326 \ -t_srs EPSG:4326 \
"out/grouped_${outdir}_buildings_zones.geojson" \ "out/grouped_${outdir}_buildings_zones.geojson" \

34
trial.py Normal file
View File

@ -0,0 +1,34 @@
def filterLayer(layer):
if layer is None:
print("filterLayer: empty")
return None
print(layer.GetName())
#if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]:
if layer.GetName() in ["sonoma_county_building_outlines_filtered"]: # simplified_conflated_buildings # _filtered
return layer
def filterTags(attrs):
if attrs is None:
print("filterTags: empty")
return None
tags = { "building": "yes" }
if "gid" in attrs and attrs["gid"] != "":
tags["x_son_p:gid"] = attrs["gid"]
if "addr:state" in attrs and attrs["addr:state"] != "":
tags["addr:state"] = attrs["addr:state"]
if "addr:city" in attrs and attrs["addr:city"] != "":
tags["addr:city"] = attrs["addr:city"]
if "addr:street" in attrs and attrs["addr:street"] != "":
tags["addr:street"] = attrs["addr:street"]
if "addr:housenumber" in attrs and attrs["addr:housenumber"] != "":
tags["addr:housenumber"] = attrs["addr:housenumber"]
if "addr:unit" in attrs and attrs["addr:unit"] != "":
tags["addr:unit"] = attrs["addr:unit"]
if "x_son_imp:usecode" in attrs and attrs["x_son_imp:usecode"] != "":
tags["sonoma:x_son_imp:usecode"] = attrs["x_son_imp:usecode"] #TODO: proper x_son_imp:usecode
return tags

73
trial.sh Executable file
View File

@ -0,0 +1,73 @@
# Before running, download VTA TAZ data from Google Drive here:
# https://drive.google.com/file/d/0B098fXDVjQOhVHBFS0kwcDNGRlU/view
# and place into a folder named "data"
# (might need to rename VTATaz.dbf)
export DBNAME=openstreetmap
export OGR2OSM=../ogr2osm/ogr2osm.py
export PGUSER=openstreetmap
export PGPASSWORD=openstreetmap
export PGHOST=localhost
export PGPORT=5432
# DB setup
psql --echo-all --command="create extension if not exists hstore;" "${DBNAME}" -h $PGHOST -U $PGUSER
psql --echo-all --command="create extension if not exists postgis;" "${DBNAME}" -h $PGHOST -U $PGUSER
# Add ESRI:103240 to PostGIS for TAZ
# from https://github.com/Esri/projection-engine-db-doc/
psql --echo-all --file="103240.sql" "${DBNAME}" -h $PGHOST -U $PGUSER
echo "Importing TAZ"
shp2pgsql -d -D -s 103240 -I "data/VTATaz" | psql -d "${DBNAME}" -h $PGHOST -U $PGUSER >/dev/null
# Conflate addresses to buildings
psql -v "ON_ERROR_STOP=true" --echo-queries --file="conflation.sql" "${DBNAME}" -h $PGHOST -U $PGUSER
# Split into tasks
mkdir "out"
mkdir "out/intersecting"
mkdir "out/clean"
for intersects in false true; do
if ${intersects}; then
outdir="intersecting"
intersectsQuery="conflated"
else
outdir="clean"
intersectsQuery="not conflated"
fi
# The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking
# https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm
ogr2ogr -sql "select 'https://github.com/zyphlar/sonoma-import/raw/main/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \
-t_srs EPSG:4326 \
"out/grouped_${outdir}_buildings_zones.geojson" \
"PG:dbname=${DBNAME} host=${PGHOST} user=${PGUSER} password=${PGPASSWORD}"
sed -i 's/ //g' "out/grouped_${outdir}_buildings_zones.geojson"
# TAZ IDs from 965 to 1050 are Sonoma County broken up unto convenient polygons
for cid in {965..1050}; do
# Skip empty TAZs
if [ $(psql --command="copy (select count(*) from VTATaz where key=${cid}) to stdout csv" ${DBNAME} -h $PGHOST -U $PGUSER) = 0 ]; then
continue
fi
output="out/${outdir}/buildings_${cid}.osm"
# Filter export data to each CID
for layer in "sonoma_county_building_outlines"; do
psql -h $PGHOST -U $PGUSER -v "ON_ERROR_STOP=true" --echo-queries --command="create or replace view \"${layer}_filtered\" as select * from \"${layer}\" where ${intersectsQuery};" "${DBNAME}"
done
# Export to OSM
python3 "${OGR2OSM}" "PG:dbname=${DBNAME} host=${PGHOST} user=${PGUSER} password=${PGPASSWORD}" -f -t trial.py --no-memory-copy -o "${output}"
# Add sample region outline
#sed -i '3i<bounds minlat="37.2440898883458" minlon="-121.875007225253" maxlat="37.25775329679" maxlon="-121.855829662555" />' "${output}"
done
done