Prepping for slicing, fixing dangling addresses, adding city/state/use/gid

This commit is contained in:
Will Bradley 2021-02-04 10:43:33 -08:00
parent a9a8646c46
commit bef81198a8
8 changed files with 526 additions and 73 deletions

3
.gitignore vendored
View File

@ -14,3 +14,6 @@ data/
# VIM stuff
*.swp
# Python
__pycache__

4
103240.sql Normal file
View File

@ -0,0 +1,4 @@
INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext)
values (103240, 'ESRI', 103240,
'+proj=lcc +ellps=GRS80 +units=us-ft +x_0=2000000.0 +y_0=500000.0 +lon_0=120d30''W +lat_0=36d30''N +lat_1=37d4''N +lat_2=38d26''N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs ',
'PROJCS["NAD_1983_CORS96_StatePlane_California_III_FIPS_0403_Ft_US",GEOGCS["GCS_NAD_1983_CORS96",DATUM["D_NAD_1983_CORS96",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",6561666.666666666],PARAMETER["False_Northing",1640416.666666667],PARAMETER["Central_Meridian",-120.5],PARAMETER["Standard_Parallel_1",37.06666666666667],PARAMETER["Standard_Parallel_2",38.43333333333333],PARAMETER["Latitude_Of_Origin",36.5],UNIT["Foot_US",0.3048006096012192],AUTHORITY["Esri",103240]]');

View File

@ -59,7 +59,10 @@ Debian (shp2pgsql is included in postgis)
- The postgresql server started/running/configured and database `openstreetmap` created, generally at localhost port 5432.
- Osmosis, for export
- For export: sudo apt-get install -y gdal-bin python-lxml
- ogr2osm https://github.com/pnorman/ogr2osm
- Maybe also/instead: python3-gdal
- Do NOT install the osgeo package from pip, it's empty and will cause ogr import errors.
## Running

View File

@ -1,28 +1,41 @@
-- TODO:
-- - parse `usecode` or `usecdesc` for parcel type (residential / school / biz / etc)
-- - parse city and state as well for mailing
-- add fields for OSM tags and data processing
ALTER TABLE sonoma_county_building_outlines
ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text,
ADD COLUMN "addr:unit" text,
ADD COLUMN "addr:city" text,
ADD COLUMN "addr:state" text,
ADD COLUMN usecode integer,
ADD COLUMN loc_geom geometry(multipolygon,4326),
ADD COLUMN conflated boolean DEFAULT FALSE,
ADD COLUMN main boolean; -- is it the main building on the parcel?
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
-- create local geometry fields and validate geometries
UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom);
--UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
--CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom);
-- added fields for the parcels table
ALTER TABLE parcels__public_
ADD COLUMN "addr:housenumber" text,
ADD COLUMN "addr:street" text,
ADD COLUMN "addr:unit" text,
ADD COLUMN "addr:city" text,
ADD COLUMN "addr:state" text,
ADD COLUMN loc_geom geometry(multipolygon,4326),
ADD COLUMN building_count integer,
ADD COLUMN repeating BOOLEAN DEFAULT FALSE;
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
-- create local geometry fields and validate geometries
UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
CREATE INDEX ON parcels__public_ USING GIST (loc_geom);
--UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
--CREATE INDEX ON parcels__public_ USING GIST (loc_geom);
-- parse and expand parcel street addresses
-- TODO: find/handle oddballs like 123A Main St and 123 Main St #4
@ -60,6 +73,7 @@ BEGIN
WHEN 'CDS' THEN 'Cul-de-sac'
WHEN 'CIR' THEN 'Circle'
WHEN 'CMNS' THEN 'Commons'
WHEN 'CNTR' THEN 'Center'
WHEN 'CONC' THEN 'Concession'
WHEN 'CRES' THEN 'Crescent'
WHEN 'CRST' THEN 'Crest'
@ -85,7 +99,8 @@ BEGIN
WHEN 'LOOP' THEN 'Loop'
WHEN 'MALL' THEN 'Mall'
WHEN 'MAL' THEN 'Mall'
WHEN 'MTWY' THEN 'Motorway'
WHEN 'MTN' THEN 'Mountain'
WHEN 'MTWY' THEN 'Motorway'
WHEN 'OVAL' THEN 'Oval'
WHEN 'OPAS' THEN 'Overpass'
WHEN 'OVPS' THEN 'Overpass'
@ -109,7 +124,9 @@ BEGIN
WHEN 'RUE' THEN 'Rue'
WHEN 'RUN' THEN 'Run'
WHEN 'SKWY' THEN 'Skyway'
WHEN 'SPUR' THEN 'Spur'
WHEN 'SPGS' THEN 'Springs'
WHEN 'SPRGS' THEN 'Springs'
WHEN 'SPUR' THEN 'Spur'
WHEN 'SQ' THEN 'Square'
WHEN 'SR' THEN 'State Route'
WHEN 'STCT' THEN 'Street Court'
@ -172,6 +189,11 @@ update parcels__public_ SET "addr:housenumber" = NULL,
"addr:unit" = NULL
where "addr:housenumber" IS NOT NULL;
-- parse city sratr
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\1')),
"addr:state" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\2'))
where situsfmt2 SIMILAR TO '([A-Za-z]+)\*? ([A-Za-z]+)' IS NOT NULL;
-- basic 123 Main with no common suffixes or numbers
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)'
@ -201,12 +223,14 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})';
-- now 123 Twin Oaks Ln or 123 St Oaks Pl
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Pl / Main
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Dr / Oak
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- now 123 Twin Oaks Ln or 123 St Oaks Pl or 12690 Redwood Hwy So or 1300 19th Hole Dr
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los / 19th
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Pl / Main
|| ' ' -- space
|| initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4')))) -- Dr / Oak / So
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- now 123 E Cherry Creek Rd
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South
@ -227,8 +251,28 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street
|| ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South)
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})';
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South)
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})';
-- and 14521 CANYON 2 RD
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\2')) -- Canyon
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\3'))) -- 2
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\4'))) -- Rd
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})';
-- and 15560 UPPER CANYON 3 RD
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\2')) -- Upper
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\3'))) -- Canyon
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\4'))) -- 2
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\5'))) -- Rd
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})';
-- and the even more lovable 123 Main Hill St S
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123
@ -239,40 +283,124 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South)
where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})';
-- now 10000 FRANZ VALLEY SCHOOL RD / 6401 MTN VIEW RANCH RD / 3762 MANOR LN WEST BRANCH / 222 RAGLE RD SOUTH RD/ 300 ROHNERT PARK EXPWY WEST
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 10000
"addr:street" = initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2')))) -- Franz
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Valley
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- School
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Road
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- now 27801 STEWARTS PT SKAGGS SPRGS RD
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 27801
"addr:street" = initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2')))) -- Stewarts
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Point
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Skaggs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Springs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\6'))) -- Road
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})';
-- 131 LYNCH CREEK A WAY
-- 935 W SPAIN UNIT B ST
-- now 1706 B W COLLEGE AVE
-- now 2347 MARIA LUZ E CT
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\3'))) -- Drive
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})';
-- 622 ELY S BLVD
-- 292 ELY BLVD S BLVD
-- now 1460 TOWN & COUNTRY DR
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\3'))) -- Drive
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})';
-- for these apartment numbers we're gonna need to start using some functions
-- no direction but three words in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2 \3 ')) -- La Main
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)';
-- suffix direction
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\3'))) -- Street
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 \3 ')) -- La Main
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- suffix direction and two words
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Street
|| ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- S (South)
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)';
-- prefix direction
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2'))) -- S (South)
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)';
-- no direction but two words in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)';
-- no direction but one word in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2')), -- Main / 4th
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)';
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- S (South)
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)';
-- prefix direction and two words
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South)
|| initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- prefix direction and three words like 1323 W DRY CREEK RD #2
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South)
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Dry
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))) -- Creek/Ext
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5'))), -- Road
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\6')) -- Unit 4 / A
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction and two words in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction and one word in street name
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2')), -- Main / 4th
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction and five words in street name like 31510 STEWARTS PT SKAGGS SPRGS RD #B
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 31510
"addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- Stewarts
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Point
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))) -- Skaggs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5'))) -- Springs
|| ' ' -- space
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\6'))), -- Road
"addr:unit" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\7'))) -- Unit B
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction, two words in street name, and "STE XXX" or "Ste XXX" in the unit
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\4')) -- STE 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)';
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\3'))), -- Street
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\4')) -- Ste 4 / A / 1-A-B2
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)';
-- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')),
@ -287,28 +415,48 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\1')),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')),
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3'))
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)';
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)';
-- TODO: replace Mc([a-z]) with Mc(upper(\1)) when preceded by nothingness or a space
-- capitalize Macarthur, Macfarlane, Mackinnon but not Macaw Mackey Mackl[iy]n or Macmahan
-- investigate Blank Road
-- consider "0" housenumbers
-- remove leading zeroes in housenumbers
-- remove "Ste" from unit
-- 5330 OLD REDWOOD HWY #A B & C
-- 27801 STEWARTS PT SKAGGS SPRGS RD
-- 10000 FRANZ VALLEY SCHOOL RD
-- 1003 HWY 116 N
-- 1382 HWY 116 S #1
-- 100 SPRING MTN SUMMIT TRL
-- 1055 BROADWAY #C, D
-- 1055 BROADWAY #E - H
-- 10826 SUMMER HOME PARK RD
-- 1323 W DRY CREEK RD #2
-- 1340 19TH HOLE DR
-- 14521 CANYON 2 RD
-- 14578 CANYON 1 RD
-- 1460 TOWN & COUNTRY DR
-- 123 HWY 116 N #C1 is a unique case that needs to be 123 Highway 116 North, Unit C1
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')) -- Highway 116
|| ' ' -- space
|| initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\3'))), -- North
"addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\4')) -- Unit C1
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)';
-- 3333 STEWART PT SKAGGS SPRING RD is a unique case that needs to be Stewarts Point-Skaggs Springs Road
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) STEWART PT SKAGGS SPRING RD$', '\1')),
"addr:street" = 'Stewarts Point-Skaggs Springs Road'
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) STEWART PT SKAGGS SPRING RD';
-- remove "Ste", "Kandace", "Starr" from unit
update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Ste', '') where "addr:unit" LIKE '%Ste%';
update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Kandace', '') where "addr:unit" LIKE '%Kandace%';
update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Starr', '') where "addr:unit" LIKE '%Starr%';
-- remove "Ln" from unit and move it to the street
update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Ln', ''),
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+#LN$', '\2 Lane'))
where situsfmt1 LIKE '%#LN%';
-- properly categorize certain Scottish last names (we're drawing the line at Mackey and non-Scottish Mc* / Mac*)
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mcarthur', 'McArthur') where "addr:street" LIKE '%Mcarthur%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mcdowell', 'McDowell') where "addr:street" LIKE '%Mcdowell%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macarthur', 'MacArthur') where "addr:street" LIKE '%Macarthur%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macfarlane', 'MacFarlane') where "addr:street" LIKE '%Macfarlane%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mackinnon', 'MacKinnon') where "addr:street" LIKE '%Mackinnon%';
update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macmahan', 'MacMahan') where "addr:street" LIKE '%Macmahan%';
-- Stewarts Point-Skaggs Springs Road is the OpenStreetMap name for this street, override
update parcels__public_ SET "addr:street" = 'Stewarts Point-Skaggs Springs Road' where situsfmt1 LIKE '%STEWART%SKAGG%';
-- FYI this dataset has "Blank Road" but that is an actual real road
-- TODO: consider "0" housenumbers
-- 900 TRANSPORT WAY #A&B
-- 21075 RIVER BLVD #1 & 2
-- 34 A&B RANDALL LN
-- 34 A & B RANDALL LN
-- 99 e SHILOH RD
--
@ -345,7 +493,13 @@ FROM bcounts WHERE bcounts.gid = parcels__public_.gid;
UPDATE sonoma_county_building_outlines SET "addr:housenumber" = NULL, "addr:street" = NULL;
WITH a AS (
SELECT
b.gid, p."addr:housenumber", p."addr:street"
b.gid,
p."addr:housenumber",
p."addr:street",
p."addr:unit",
p."addr:city",
p."addr:state",
p.usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -353,7 +507,11 @@ WITH a AS (
)
UPDATE sonoma_county_building_outlines SET
"addr:housenumber" = a."addr:housenumber",
"addr:street" = a."addr:street"
"addr:street" = a."addr:street",
"addr:unit" = a."addr:unit",
"addr:city" = a."addr:city",
"addr:state" = a."addr:state",
"x_son_imp:usecode" = a.usecode
FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL;
@ -383,7 +541,7 @@ FROM sizes WHERE sizes.bid = sonoma_county_building_outlines.gid;
-- now assign addresses to main buildings on parcels with outbuildings
WITH a AS (
SELECT
b.gid, p."addr:housenumber", p."addr:street"
b.gid, p."addr:housenumber", p."addr:street", p.usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -394,7 +552,8 @@ WITH a AS (
)
UPDATE sonoma_county_building_outlines SET
"addr:housenumber" = a."addr:housenumber",
"addr:street" = a."addr:street"
"addr:street" = a."addr:street",
"usecode" = a.usecode
FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
-- get a count of outbuildings so we know how many addresses are intentionally unassigned
@ -420,7 +579,8 @@ WITH addresses AS (
SELECT
b.gid,
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber,
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
p.usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -432,7 +592,8 @@ WITH addresses AS (
)
UPDATE sonoma_county_building_outlines AS b SET
"addr:housenumber" = housenumber,
"addr:street" = street
"addr:street" = street,
"usecode" = a.usecode
FROM addresses AS a
WHERE a.gid = b.gid;
@ -568,3 +729,36 @@ FROM (
WHERE
ST_Intersects(poly.geom, baz.geom)
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
------- TODO
alter table sonoma_building_outlines add column cid integer;
-- Drop TAZs that aren't near SJ
with hull as (
select ST_ConvexHull(ST_Collect(geom)) as geom from (
union select geom
from "sonoma_building_outlines"
) as geom)
delete from VTATaz
using hull
where not ST_Intersects(VTATaz.geom, hull.geom);
-- Assign cluster to each data point
update sonoma_building_outlines as t
set cid = taggedThing.key
from (
select (row_number() over (partition by sonoma_building_outlines.gid order by ST_Distance(sonoma_building_outlines.geom, VTATaz.geom))) as rn,
VTATaz.key, sonoma_building_outlines.gid
from sonoma_building_outlines
join VTATaz
on ST_Intersects(sonoma_building_outlines.geom, VTATaz.geom)
) as taggedThing
where t.gid = taggedThing.gid and rn = 1;
-- More specifically drop TAZs that don't have any SJ data in them
delete from VTATaz
where key not in (
select distinct cid from sonoma_building_outlines
);

139
reference/basemap.py Normal file
View File

@ -0,0 +1,139 @@
def filterLayer(layer):
if layer is None:
print("filterLayer: empty")
return None
print(layer.GetName())
#if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]:
if layer.GetName() in ["buildingfootprint_filtered", "Site_Address_Points_filtered", "mergedbuildings_filtered", "namedparcels_filtered"]:
return layer
def mergeToRanges(ls):
""" Takes a list like ['1', '2', '3', '5', '8', 9'] and returns a list like
['1-3', '5', '8', '9'] """
if len(ls) < 2:
return ls
i = 0
while i < len(ls)-1 and \
((ls[i].isdigit() and ls[i+1].isdigit() and \
int(ls[i])+1 == int(ls[i+1])) or \
(len(ls[i]) == 1 and len(ls[i+1]) == 1 and \
ord(ls[i])+1 == ord(ls[i+1]))):
i += 1
if i < 2:
return ls[0:i+1]+mergeToRanges(ls[i+1:])
else:
return [ls[0]+'-'+ls[i]]+mergeToRanges(ls[i+1:])
# I don't actually know if the building heights are in US standard feet or
# survey feet. But the difference is less than the significant digits for the
# tallest building.
SURVEY_FEET_TO_METER = 1200.0/3937.0
def filterTags(attrs):
if attrs is None:
print("filterTags: empty")
return None
tags = {}
if "bldgelev" in attrs:
# BuildingFootprint
tags["building"] = "yes"
# Always appear, has equivalent
tags["height"] = "%.02f"%round(float(attrs["bldgheight"])*SURVEY_FEET_TO_METER, 2)
tags["ele"] = "%.02f"%round(float(attrs["bldgelev"])*SURVEY_FEET_TO_METER, 2)
# Always appear, no equivalent: FACILITYID
# Sometimes appear, no equivalent: LASTUPDATE
# Empty: LENGTH, SHAPE_AREA
if "Inc_Muni" in attrs:
# Site_Address_Points
# Always appear, has equivalent
tags["addr:city"] = attrs["Inc_Muni"]
# Sometimes appear, has equivalent
addr = attrs["Add_Number"]
if addr:
addr = addr.split(';')
m = max(map(len, addr))
addr.sort(key=lambda a: a.rjust(m))
addr = ';'.join(mergeToRanges(addr))
if attrs["AddNum_Suf"]:
addr += " " + attrs["AddNum_Suf"]
tags["addr:housenumber"] = addr
street = attrs["CompName"]
if street:
if street.startswith("St "): street = "Saint"+street[2:]
elif street.startswith("Mt "): street = "Mount"+street[2:]
elif street.startswith("East St "): street = "East Saint"+street[7:]
elif street.startswith("West St "): street = "West Saint"+street[7:]
tags["addr:street"] = street
units = attrs["Unit"]
if units:
units = units.split(';')
m = max(map(len, units))
units.sort(key=lambda a: a.rjust(m))
units = ';'.join(mergeToRanges(units))
tags["addr:unit"] = units
zipcode = attrs["Post_Code"]
if zipcode: tags["addr:postcode"] = zipcode
pt = attrs["Place_Type"]
#if pt == "BU":
#tags["office"] = "yes"
if pt == "ED":
tags["amenity"] = "school"
elif pt == "FB":
tags["amenity"] = "place_of_worship"
elif pt == "GO":
tags["office"] = "government"
elif pt == "GQ":
# Salvation army
tags["amenity"] = "social_facility"
elif pt == "HS":
tags["amenity"] = "hospital"
elif pt == "HT" and not units:
tags["tourism"] = "hotel"
elif pt == "RE":
tags["club"] = "sport"
elif pt == "RT":
tags["amenity"] = "restaurant"
elif pt == "RL":
tags["shop"] = "yes"
elif pt == "TR":
tags["public_transport"] = "platform"
# Always appear, no equivalent: OBJECTID, Site_NGUID, ESN, Lat, Long, Status, Juris_Auth, LastUpdate, LastEditor, GlobalID
# FullMailin could be used for addr:full, but it's unneeded.
# Sometimes appear, no equivalent: RCL_NGUID, StreetMast, ParcelID, CondoParce, UnitID, RSN, PSAP_ID, St_PreDirA, St_PreTyp, StreetName, St_PosTyp, St_PosTypC, St_PosTypU, St_PosDir, Feanme, FullName, Unit_Type, Building, FullUnit, FullAddres, Addtl_Loc, LSt_PreDir, LSt_Name, LSt_Type, Uninc_Comm, Post_Comm, Source, Effective, Notes
# Always the same: Client_ID, County, State, Country, Placement
# Always empty: Site_NGU00, AddNum_Pre, St_PreMod, St_PreDir, St_PreSep, St_PosMod, Floor, Room, Seat, Post_Code4, APN, LStPostDir, AddCode, AddDataURI, Nbrhd_Comm, MSAGComm, LandmkName, Mile_Post, Elev, Expire
if "Inc_Muni" in attrs and "bldgelev" in attrs:
# Merged address/buildings
# other Place_Type are Common Area (multi-use), Miscellaneous
tags["building"] = {"BU": "commercial",
"ED": "school",
"FB": "religious",
"GO": "government",
"HS": "hospital",
"HT": "hotel",
"MH": "static_caravan",
"Condominium": "residential",
"MF": "residential",
"RL": "retail",
"RT": "retail",
"SF": "house"}.get(attrs["Place_Type"], "yes")
if "Addtl_Loc" in attrs and "Inc_Muni" not in attrs:
# Named parcels
tags["landuse"] = "residential"
tags["name"] = attrs["Addtl_Loc"].title()
return tags

View File

@ -78,7 +78,10 @@ for intersects in false true; do
outdir="clean"
intersectsQuery="not intersectsExisting"
fi
# The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking
# https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm
ogr2ogr -sql "select 'https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \
-t_srs EPSG:4326 \
"out/grouped_${outdir}_buildings_zones.geojson" \

34
trial.py Normal file
View File

@ -0,0 +1,34 @@
def filterLayer(layer):
if layer is None:
print("filterLayer: empty")
return None
print(layer.GetName())
#if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]:
if layer.GetName() in ["sonoma_county_building_outlines_filtered"]: # simplified_conflated_buildings # _filtered
return layer
def filterTags(attrs):
if attrs is None:
print("filterTags: empty")
return None
tags = { "building": "yes" }
if "gid" in attrs and attrs["gid"] != "":
tags["x_son_p:gid"] = attrs["gid"]
if "addr:state" in attrs and attrs["addr:state"] != "":
tags["addr:state"] = attrs["addr:state"]
if "addr:city" in attrs and attrs["addr:city"] != "":
tags["addr:city"] = attrs["addr:city"]
if "addr:street" in attrs and attrs["addr:street"] != "":
tags["addr:street"] = attrs["addr:street"]
if "addr:housenumber" in attrs and attrs["addr:housenumber"] != "":
tags["addr:housenumber"] = attrs["addr:housenumber"]
if "addr:unit" in attrs and attrs["addr:unit"] != "":
tags["addr:unit"] = attrs["addr:unit"]
if "x_son_imp:usecode" in attrs and attrs["x_son_imp:usecode"] != "":
tags["sonoma:x_son_imp:usecode"] = attrs["x_son_imp:usecode"] #TODO: proper x_son_imp:usecode
return tags

73
trial.sh Executable file
View File

@ -0,0 +1,73 @@
# Before running, download VTA TAZ data from Google Drive here:
# https://drive.google.com/file/d/0B098fXDVjQOhVHBFS0kwcDNGRlU/view
# and place into a folder named "data"
# (might need to rename VTATaz.dbf)
export DBNAME=openstreetmap
export OGR2OSM=../ogr2osm/ogr2osm.py
export PGUSER=openstreetmap
export PGPASSWORD=openstreetmap
export PGHOST=localhost
export PGPORT=5432
# DB setup
psql --echo-all --command="create extension if not exists hstore;" "${DBNAME}" -h $PGHOST -U $PGUSER
psql --echo-all --command="create extension if not exists postgis;" "${DBNAME}" -h $PGHOST -U $PGUSER
# Add ESRI:103240 to PostGIS for TAZ
# from https://github.com/Esri/projection-engine-db-doc/
psql --echo-all --file="103240.sql" "${DBNAME}" -h $PGHOST -U $PGUSER
echo "Importing TAZ"
shp2pgsql -d -D -s 103240 -I "data/VTATaz" | psql -d "${DBNAME}" -h $PGHOST -U $PGUSER >/dev/null
# Conflate addresses to buildings
psql -v "ON_ERROR_STOP=true" --echo-queries --file="conflation.sql" "${DBNAME}" -h $PGHOST -U $PGUSER
# Split into tasks
mkdir "out"
mkdir "out/intersecting"
mkdir "out/clean"
for intersects in false true; do
if ${intersects}; then
outdir="intersecting"
intersectsQuery="conflated"
else
outdir="clean"
intersectsQuery="not conflated"
fi
# The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking
# https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm
ogr2ogr -sql "select 'https://github.com/zyphlar/sonoma-import/raw/main/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \
-t_srs EPSG:4326 \
"out/grouped_${outdir}_buildings_zones.geojson" \
"PG:dbname=${DBNAME} host=${PGHOST} user=${PGUSER} password=${PGPASSWORD}"
sed -i 's/ //g' "out/grouped_${outdir}_buildings_zones.geojson"
# TAZ IDs from 965 to 1050 are Sonoma County broken up unto convenient polygons
for cid in {965..1050}; do
# Skip empty TAZs
if [ $(psql --command="copy (select count(*) from VTATaz where key=${cid}) to stdout csv" ${DBNAME} -h $PGHOST -U $PGUSER) = 0 ]; then
continue
fi
output="out/${outdir}/buildings_${cid}.osm"
# Filter export data to each CID
for layer in "sonoma_county_building_outlines"; do
psql -h $PGHOST -U $PGUSER -v "ON_ERROR_STOP=true" --echo-queries --command="create or replace view \"${layer}_filtered\" as select * from \"${layer}\" where ${intersectsQuery};" "${DBNAME}"
done
# Export to OSM
python3 "${OGR2OSM}" "PG:dbname=${DBNAME} host=${PGHOST} user=${PGUSER} password=${PGPASSWORD}" -f -t trial.py --no-memory-copy -o "${output}"
# Add sample region outline
#sed -i '3i<bounds minlat="37.2440898883458" minlon="-121.875007225253" maxlat="37.25775329679" maxlon="-121.855829662555" />' "${output}"
done
done