From bef81198a8cc517abc43b6fb99830fb6eec96f53 Mon Sep 17 00:00:00 2001 From: Will Bradley Date: Thu, 4 Feb 2021 10:43:33 -0800 Subject: [PATCH] Prepping for slicing, fixing dangling addresses, adding city/state/use/gid --- .gitignore | 3 + 103240.sql | 4 + README.md | 5 +- conflation.sql | 336 ++++++++++++++++++++++++------- reference/basemap.py | 139 +++++++++++++ export.sh => reference/export.sh | 5 +- trial.py | 34 ++++ trial.sh | 73 +++++++ 8 files changed, 526 insertions(+), 73 deletions(-) create mode 100644 103240.sql create mode 100644 reference/basemap.py rename export.sh => reference/export.sh (95%) create mode 100644 trial.py create mode 100755 trial.sh diff --git a/.gitignore b/.gitignore index 957a22c..7b60ad2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,6 @@ data/ # VIM stuff *.swp + +# Python +__pycache__ diff --git a/103240.sql b/103240.sql new file mode 100644 index 0000000..9701aae --- /dev/null +++ b/103240.sql @@ -0,0 +1,4 @@ +INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) +values (103240, 'ESRI', 103240, +'+proj=lcc +ellps=GRS80 +units=us-ft +x_0=2000000.0 +y_0=500000.0 +lon_0=120d30''W +lat_0=36d30''N +lat_1=37d4''N +lat_2=38d26''N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs ', +'PROJCS["NAD_1983_CORS96_StatePlane_California_III_FIPS_0403_Ft_US",GEOGCS["GCS_NAD_1983_CORS96",DATUM["D_NAD_1983_CORS96",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["False_Easting",6561666.666666666],PARAMETER["False_Northing",1640416.666666667],PARAMETER["Central_Meridian",-120.5],PARAMETER["Standard_Parallel_1",37.06666666666667],PARAMETER["Standard_Parallel_2",38.43333333333333],PARAMETER["Latitude_Of_Origin",36.5],UNIT["Foot_US",0.3048006096012192],AUTHORITY["Esri",103240]]'); diff --git a/README.md b/README.md index 8b12954..84160b8 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,10 @@ Debian (shp2pgsql is included in postgis) - The postgresql server started/running/configured and database `openstreetmap` created, generally at localhost port 5432. -- Osmosis, for export +- For export: sudo apt-get install -y gdal-bin python-lxml + - ogr2osm https://github.com/pnorman/ogr2osm + - Maybe also/instead: python3-gdal + - Do NOT install the osgeo package from pip, it's empty and will cause ogr import errors. ## Running diff --git a/conflation.sql b/conflation.sql index ad4b401..6a95f99 100644 --- a/conflation.sql +++ b/conflation.sql @@ -1,28 +1,41 @@ +-- TODO: +-- - parse `usecode` or `usecdesc` for parcel type (residential / school / biz / etc) +-- - parse city and state as well for mailing + -- add fields for OSM tags and data processing ALTER TABLE sonoma_county_building_outlines ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:street" text, ADD COLUMN "addr:unit" text, + ADD COLUMN "addr:city" text, + ADD COLUMN "addr:state" text, + ADD COLUMN usecode integer, ADD COLUMN loc_geom geometry(multipolygon,4326), ADD COLUMN conflated boolean DEFAULT FALSE, ADD COLUMN main boolean; -- is it the main building on the parcel? +update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL; + -- create local geometry fields and validate geometries -UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom); -CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); +--UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom); +--CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); -- added fields for the parcels table ALTER TABLE parcels__public_ ADD COLUMN "addr:housenumber" text, ADD COLUMN "addr:street" text, ADD COLUMN "addr:unit" text, + ADD COLUMN "addr:city" text, + ADD COLUMN "addr:state" text, ADD COLUMN loc_geom geometry(multipolygon,4326), ADD COLUMN building_count integer, ADD COLUMN repeating BOOLEAN DEFAULT FALSE; +update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL; + -- create local geometry fields and validate geometries -UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom); -CREATE INDEX ON parcels__public_ USING GIST (loc_geom); +--UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom); +--CREATE INDEX ON parcels__public_ USING GIST (loc_geom); -- parse and expand parcel street addresses -- TODO: find/handle oddballs like 123A Main St and 123 Main St #4 @@ -60,6 +73,7 @@ BEGIN WHEN 'CDS' THEN 'Cul-de-sac' WHEN 'CIR' THEN 'Circle' WHEN 'CMNS' THEN 'Commons' + WHEN 'CNTR' THEN 'Center' WHEN 'CONC' THEN 'Concession' WHEN 'CRES' THEN 'Crescent' WHEN 'CRST' THEN 'Crest' @@ -85,7 +99,8 @@ BEGIN WHEN 'LOOP' THEN 'Loop' WHEN 'MALL' THEN 'Mall' WHEN 'MAL' THEN 'Mall' - WHEN 'MTWY' THEN 'Motorway' + WHEN 'MTN' THEN 'Mountain' + WHEN 'MTWY' THEN 'Motorway' WHEN 'OVAL' THEN 'Oval' WHEN 'OPAS' THEN 'Overpass' WHEN 'OVPS' THEN 'Overpass' @@ -109,7 +124,9 @@ BEGIN WHEN 'RUE' THEN 'Rue' WHEN 'RUN' THEN 'Run' WHEN 'SKWY' THEN 'Skyway' - WHEN 'SPUR' THEN 'Spur' + WHEN 'SPGS' THEN 'Springs' + WHEN 'SPRGS' THEN 'Springs' + WHEN 'SPUR' THEN 'Spur' WHEN 'SQ' THEN 'Square' WHEN 'SR' THEN 'State Route' WHEN 'STCT' THEN 'Street Court' @@ -172,6 +189,11 @@ update parcels__public_ SET "addr:housenumber" = NULL, "addr:unit" = NULL where "addr:housenumber" IS NOT NULL; +-- parse city sratr +update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\1')), + "addr:state" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\*? ([A-Za-z]+)$', '\2')) + where situsfmt2 SIMILAR TO '([A-Za-z]+)\*? ([A-Za-z]+)' IS NOT NULL; + -- basic 123 Main with no common suffixes or numbers update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\1')), "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]+)$', '\2')) where situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]+)' @@ -201,12 +223,14 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\2 ')) -- Main / 4th / A / Saint || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})$', '\3'))) -- Street / Johns where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{1,99}) ([A-Z]{2,99})'; --- now 123 Twin Oaks Ln or 123 St Oaks Pl -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 - "addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los - || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', ' \3 ')) -- Pl / Main - || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Dr / Oak - where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})'; +-- now 123 Twin Oaks Ln or 123 St Oaks Pl or 12690 Redwood Hwy So or 1300 19th Hole Dr +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- St / Los / 19th + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Pl / Main + || ' ' -- space + || initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4')))) -- Dr / Oak / So + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})'; -- now 123 E Cherry Creek Rd update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 123 "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{2,99})$', '\2'))) -- South @@ -227,8 +251,28 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\2 ')) -- Main / 4th || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\3'))) -- Street || ' ' -- space - || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South) - where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})'; + || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})$', '\4'))) -- S (South) + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})'; + +-- and 14521 CANYON 2 RD +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\2')) -- Canyon + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\3'))) -- 2 + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\4'))) -- Rd + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})'; + +-- and 15560 UPPER CANYON 3 RD +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\2')) -- Upper + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\3'))) -- Canyon + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\4'))) -- 2 + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})$', '\5'))) -- Rd + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([0-9]{1}) ([A-Z]{2})'; -- and the even more lovable 123 Main Hill St S update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\1')), -- 123 @@ -239,40 +283,124 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})$', '\5'))) -- S (South) where situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([0-9A-Z]{2,99}) ([A-Z]{1})'; +-- now 10000 FRANZ VALLEY SCHOOL RD / 6401 MTN VIEW RANCH RD / 3762 MANOR LN WEST BRANCH / 222 RAGLE RD SOUTH RD/ 300 ROHNERT PARK EXPWY WEST +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 10000 + "addr:street" = initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2')))) -- Franz + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Valley + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- School + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Road + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})'; + +-- now 27801 STEWARTS PT SKAGGS SPRGS RD +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\1')), -- 27801 + "addr:street" = initcap(expand_road(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\2')))) -- Stewarts + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\3'))) -- Point + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\4'))) -- Skaggs + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\5'))) -- Springs + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})$', '\6'))) -- Road + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})'; + +-- 131 LYNCH CREEK A WAY + +-- 935 W SPAIN UNIT B ST + +-- now 1706 B W COLLEGE AVE + +-- now 2347 MARIA LUZ E CT +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\3'))) -- Drive + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})'; + + +-- 622 ELY S BLVD + +-- 292 ELY BLVD S BLVD + +-- now 1460 TOWN & COUNTRY DR +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\3'))) -- Drive + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})'; -- for these apartment numbers we're gonna need to start using some functions -- no direction but three words in street name -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2 \3 ')) -- La Main - || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street - "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 - where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)'; --- suffix direction -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th - || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\3'))) -- Street +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 \3 ')) -- La Main + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{4,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; +-- suffix direction and two words +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Street || ' ' -- space - || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- S (South) - "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A - where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([0-9A-Z\-]+)'; --- prefix direction -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 - "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\2'))) -- S (South) - || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th - || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\4'))), -- Street - "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)$', '\5')) -- Unit 4 / A - where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([0-9A-Z\-]+)'; --- no direction but two words in street name -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th - || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3'))), -- Street - "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2 - where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([0-9A-Z\-]+)'; --- no direction but one word in street name -update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\1')), -- 123 - "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\2')), -- Main / 4th - "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2 - where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([0-9A-Z\-]+)'; + || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- S (South) + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]{3,99}) ([A-Z]{2,99}) ([A-Z]{1})[ ]+[#]+([ 0-9A-Z\-]+)'; +-- prefix direction and two words +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South) + || initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', ' \3 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5')) -- Unit 4 / A + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; +-- prefix direction and three words like 1323 W DRY CREEK RD #2 +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- S (South) + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Dry + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))) -- Creek/Ext + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5'))), -- Road + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\6')) -- Unit 4 / A + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{1}) ([0-9A-Z]{3,99}) ([0-9A-Z]{3,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; +-- no direction and two words in street name +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\4')) -- Unit 4 / A / 1-A-B2 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)'; +-- no direction and one word in street name +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\2')), -- Main / 4th + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)$', '\3')) -- Unit 4 / A / 1-A-B2 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+)[ ]+[#]+([ 0-9A-Z\-]+)'; +-- no direction and five words in street name like 31510 STEWARTS PT SKAGGS SPRGS RD #B +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\1')), -- 31510 + "addr:street" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\2'))) -- Stewarts + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\3'))) -- Point + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\4'))) -- Skaggs + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\5'))) -- Springs + || ' ' -- space + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\6'))), -- Road + "addr:unit" = initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)$', '\7'))) -- Unit B + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; + +-- no direction, two words in street name, and "STE XXX" or "Ste XXX" in the unit +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\3'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\4')) -- STE 4 / A / 1-A-B2 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)'; +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th + || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\3'))), -- Street + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)$', '\4')) -- Ste 4 / A / 1-A-B2 + where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+Ste ([0-9A-Z\-]+)'; -- 123 D EXT ST is a unique case that actually needs to be 123 D Street Extension update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) EXT ST$', '\1')), @@ -287,28 +415,48 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\1')), "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')), "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)$', '\3')) - where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)'; + where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)[ ]+#([0-9A-Z]+)'; --- TODO: replace Mc([a-z]) with Mc(upper(\1)) when preceded by nothingness or a space --- capitalize Macarthur, Macfarlane, Mackinnon but not Macaw Mackey Mackl[iy]n or Macmahan --- investigate Blank Road --- consider "0" housenumbers --- remove leading zeroes in housenumbers --- remove "Ste" from unit --- 5330 OLD REDWOOD HWY #A B & C --- 27801 STEWARTS PT SKAGGS SPRGS RD --- 10000 FRANZ VALLEY SCHOOL RD --- 1003 HWY 116 N --- 1382 HWY 116 S #1 --- 100 SPRING MTN SUMMIT TRL --- 1055 BROADWAY #C, D --- 1055 BROADWAY #E - H --- 10826 SUMMER HOME PARK RD --- 1323 W DRY CREEK RD #2 --- 1340 19TH HOLE DR --- 14521 CANYON 2 RD --- 14578 CANYON 1 RD --- 1460 TOWN & COUNTRY DR +-- 123 HWY 116 N #C1 is a unique case that needs to be 123 Highway 116 North, Unit C1 +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\1')), -- 123 + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', 'Highway \2')) -- Highway 116 + || ' ' -- space + || initcap(expand_direction(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\3'))), -- North + "addr:unit" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)$', '\4')) -- Unit C1 + where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)'; + +-- 3333 STEWART PT SKAGGS SPRING RD is a unique case that needs to be Stewarts Point-Skaggs Springs Road +update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) STEWART PT SKAGGS SPRING RD$', '\1')), + "addr:street" = 'Stewarts Point-Skaggs Springs Road' + where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) STEWART PT SKAGGS SPRING RD'; + +-- remove "Ste", "Kandace", "Starr" from unit +update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Ste', '') where "addr:unit" LIKE '%Ste%'; +update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Kandace', '') where "addr:unit" LIKE '%Kandace%'; +update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Starr', '') where "addr:unit" LIKE '%Starr%'; +-- remove "Ln" from unit and move it to the street +update parcels__public_ SET "addr:unit" = REGEXP_REPLACE("addr:unit", 'Ln', ''), + "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+)[ ]+#LN$', '\2 Lane')) + where situsfmt1 LIKE '%#LN%'; + +-- properly categorize certain Scottish last names (we're drawing the line at Mackey and non-Scottish Mc* / Mac*) +update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mcarthur', 'McArthur') where "addr:street" LIKE '%Mcarthur%'; +update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mcdowell', 'McDowell') where "addr:street" LIKE '%Mcdowell%'; +update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macarthur', 'MacArthur') where "addr:street" LIKE '%Macarthur%'; +update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macfarlane', 'MacFarlane') where "addr:street" LIKE '%Macfarlane%'; +update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Mackinnon', 'MacKinnon') where "addr:street" LIKE '%Mackinnon%'; +update parcels__public_ SET "addr:street" = REGEXP_REPLACE("addr:street", 'Macmahan', 'MacMahan') where "addr:street" LIKE '%Macmahan%'; + +-- Stewarts Point-Skaggs Springs Road is the OpenStreetMap name for this street, override +update parcels__public_ SET "addr:street" = 'Stewarts Point-Skaggs Springs Road' where situsfmt1 LIKE '%STEWART%SKAGG%'; + +-- FYI this dataset has "Blank Road" but that is an actual real road +-- TODO: consider "0" housenumbers + +-- 900 TRANSPORT WAY #A&B +-- 21075 RIVER BLVD #1 & 2 +-- 34 A&B RANDALL LN +-- 34 A & B RANDALL LN -- 99 e SHILOH RD -- @@ -345,7 +493,13 @@ FROM bcounts WHERE bcounts.gid = parcels__public_.gid; UPDATE sonoma_county_building_outlines SET "addr:housenumber" = NULL, "addr:street" = NULL; WITH a AS ( SELECT - b.gid, p."addr:housenumber", p."addr:street" + b.gid, + p."addr:housenumber", + p."addr:street", + p."addr:unit", + p."addr:city", + p."addr:state", + p.usecode FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) @@ -353,7 +507,11 @@ WITH a AS ( ) UPDATE sonoma_county_building_outlines SET "addr:housenumber" = a."addr:housenumber", - "addr:street" = a."addr:street" + "addr:street" = a."addr:street", + "addr:unit" = a."addr:unit", + "addr:city" = a."addr:city", + "addr:state" = a."addr:state", + "x_son_imp:usecode" = a.usecode FROM a WHERE sonoma_county_building_outlines.gid = a.gid; --SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL; @@ -383,7 +541,7 @@ FROM sizes WHERE sizes.bid = sonoma_county_building_outlines.gid; -- now assign addresses to main buildings on parcels with outbuildings WITH a AS ( SELECT - b.gid, p."addr:housenumber", p."addr:street" + b.gid, p."addr:housenumber", p."addr:street", p.usecode FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) @@ -394,7 +552,8 @@ WITH a AS ( ) UPDATE sonoma_county_building_outlines SET "addr:housenumber" = a."addr:housenumber", - "addr:street" = a."addr:street" + "addr:street" = a."addr:street", + "usecode" = a.usecode FROM a WHERE sonoma_county_building_outlines.gid = a.gid; -- get a count of outbuildings so we know how many addresses are intentionally unassigned @@ -420,7 +579,8 @@ WITH addresses AS ( SELECT b.gid, array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber, - array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street + array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street, + p.usecode FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) @@ -432,7 +592,8 @@ WITH addresses AS ( ) UPDATE sonoma_county_building_outlines AS b SET "addr:housenumber" = housenumber, - "addr:street" = street + "addr:street" = street, + "usecode" = a.usecode FROM addresses AS a WHERE a.gid = b.gid; @@ -568,3 +729,36 @@ FROM ( WHERE ST_Intersects(poly.geom, baz.geom) AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; + + + +------- TODO + +alter table sonoma_building_outlines add column cid integer; + +-- Drop TAZs that aren't near SJ +with hull as ( + select ST_ConvexHull(ST_Collect(geom)) as geom from ( + union select geom + from "sonoma_building_outlines" + ) as geom) + delete from VTATaz + using hull + where not ST_Intersects(VTATaz.geom, hull.geom); + +-- Assign cluster to each data point +update sonoma_building_outlines as t + set cid = taggedThing.key + from ( + select (row_number() over (partition by sonoma_building_outlines.gid order by ST_Distance(sonoma_building_outlines.geom, VTATaz.geom))) as rn, + VTATaz.key, sonoma_building_outlines.gid + from sonoma_building_outlines + join VTATaz + on ST_Intersects(sonoma_building_outlines.geom, VTATaz.geom) + ) as taggedThing + where t.gid = taggedThing.gid and rn = 1; +-- More specifically drop TAZs that don't have any SJ data in them +delete from VTATaz + where key not in ( + select distinct cid from sonoma_building_outlines + ); diff --git a/reference/basemap.py b/reference/basemap.py new file mode 100644 index 0000000..8246bba --- /dev/null +++ b/reference/basemap.py @@ -0,0 +1,139 @@ +def filterLayer(layer): + if layer is None: + print("filterLayer: empty") + return None + + print(layer.GetName()) + + #if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]: + if layer.GetName() in ["buildingfootprint_filtered", "Site_Address_Points_filtered", "mergedbuildings_filtered", "namedparcels_filtered"]: + return layer + +def mergeToRanges(ls): + """ Takes a list like ['1', '2', '3', '5', '8', 9'] and returns a list like + ['1-3', '5', '8', '9'] """ + if len(ls) < 2: + return ls + i = 0 + while i < len(ls)-1 and \ + ((ls[i].isdigit() and ls[i+1].isdigit() and \ + int(ls[i])+1 == int(ls[i+1])) or \ + (len(ls[i]) == 1 and len(ls[i+1]) == 1 and \ + ord(ls[i])+1 == ord(ls[i+1]))): + i += 1 + if i < 2: + return ls[0:i+1]+mergeToRanges(ls[i+1:]) + else: + return [ls[0]+'-'+ls[i]]+mergeToRanges(ls[i+1:]) + +# I don't actually know if the building heights are in US standard feet or +# survey feet. But the difference is less than the significant digits for the +# tallest building. +SURVEY_FEET_TO_METER = 1200.0/3937.0 + +def filterTags(attrs): + if attrs is None: + print("filterTags: empty") + return None + + tags = {} + + if "bldgelev" in attrs: + # BuildingFootprint + tags["building"] = "yes" + # Always appear, has equivalent + tags["height"] = "%.02f"%round(float(attrs["bldgheight"])*SURVEY_FEET_TO_METER, 2) + tags["ele"] = "%.02f"%round(float(attrs["bldgelev"])*SURVEY_FEET_TO_METER, 2) + + # Always appear, no equivalent: FACILITYID + # Sometimes appear, no equivalent: LASTUPDATE + # Empty: LENGTH, SHAPE_AREA + + if "Inc_Muni" in attrs: + # Site_Address_Points + # Always appear, has equivalent + tags["addr:city"] = attrs["Inc_Muni"] + + # Sometimes appear, has equivalent + addr = attrs["Add_Number"] + if addr: + addr = addr.split(';') + m = max(map(len, addr)) + addr.sort(key=lambda a: a.rjust(m)) + addr = ';'.join(mergeToRanges(addr)) + if attrs["AddNum_Suf"]: + addr += " " + attrs["AddNum_Suf"] + tags["addr:housenumber"] = addr + + street = attrs["CompName"] + if street: + if street.startswith("St "): street = "Saint"+street[2:] + elif street.startswith("Mt "): street = "Mount"+street[2:] + elif street.startswith("East St "): street = "East Saint"+street[7:] + elif street.startswith("West St "): street = "West Saint"+street[7:] + tags["addr:street"] = street + + units = attrs["Unit"] + if units: + units = units.split(';') + m = max(map(len, units)) + units.sort(key=lambda a: a.rjust(m)) + units = ';'.join(mergeToRanges(units)) + tags["addr:unit"] = units + + zipcode = attrs["Post_Code"] + if zipcode: tags["addr:postcode"] = zipcode + + pt = attrs["Place_Type"] + #if pt == "BU": + #tags["office"] = "yes" + if pt == "ED": + tags["amenity"] = "school" + elif pt == "FB": + tags["amenity"] = "place_of_worship" + elif pt == "GO": + tags["office"] = "government" + elif pt == "GQ": + # Salvation army + tags["amenity"] = "social_facility" + elif pt == "HS": + tags["amenity"] = "hospital" + elif pt == "HT" and not units: + tags["tourism"] = "hotel" + elif pt == "RE": + tags["club"] = "sport" + elif pt == "RT": + tags["amenity"] = "restaurant" + elif pt == "RL": + tags["shop"] = "yes" + elif pt == "TR": + tags["public_transport"] = "platform" + + # Always appear, no equivalent: OBJECTID, Site_NGUID, ESN, Lat, Long, Status, Juris_Auth, LastUpdate, LastEditor, GlobalID + # FullMailin could be used for addr:full, but it's unneeded. + # Sometimes appear, no equivalent: RCL_NGUID, StreetMast, ParcelID, CondoParce, UnitID, RSN, PSAP_ID, St_PreDirA, St_PreTyp, StreetName, St_PosTyp, St_PosTypC, St_PosTypU, St_PosDir, Feanme, FullName, Unit_Type, Building, FullUnit, FullAddres, Addtl_Loc, LSt_PreDir, LSt_Name, LSt_Type, Uninc_Comm, Post_Comm, Source, Effective, Notes + # Always the same: Client_ID, County, State, Country, Placement + # Always empty: Site_NGU00, AddNum_Pre, St_PreMod, St_PreDir, St_PreSep, St_PosMod, Floor, Room, Seat, Post_Code4, APN, LStPostDir, AddCode, AddDataURI, Nbrhd_Comm, MSAGComm, LandmkName, Mile_Post, Elev, Expire + + if "Inc_Muni" in attrs and "bldgelev" in attrs: + # Merged address/buildings + # other Place_Type are Common Area (multi-use), Miscellaneous + tags["building"] = {"BU": "commercial", + "ED": "school", + "FB": "religious", + "GO": "government", + "HS": "hospital", + "HT": "hotel", + "MH": "static_caravan", + "Condominium": "residential", + "MF": "residential", + "RL": "retail", + "RT": "retail", + "SF": "house"}.get(attrs["Place_Type"], "yes") + + if "Addtl_Loc" in attrs and "Inc_Muni" not in attrs: + # Named parcels + tags["landuse"] = "residential" + tags["name"] = attrs["Addtl_Loc"].title() + + return tags diff --git a/export.sh b/reference/export.sh similarity index 95% rename from export.sh rename to reference/export.sh index 8ff811b..7882de8 100644 --- a/export.sh +++ b/reference/export.sh @@ -78,7 +78,10 @@ for intersects in false true; do outdir="clean" intersectsQuery="not intersectsExisting" fi - + + # The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking + # https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm + ogr2ogr -sql "select 'https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \ -t_srs EPSG:4326 \ "out/grouped_${outdir}_buildings_zones.geojson" \ diff --git a/trial.py b/trial.py new file mode 100644 index 0000000..cfeab98 --- /dev/null +++ b/trial.py @@ -0,0 +1,34 @@ +def filterLayer(layer): + if layer is None: + print("filterLayer: empty") + return None + + print(layer.GetName()) + + #if layer.GetName() in ["buildingfootprint", "Site_Address_Points", "mergedbuildings", "namedparcels"]: + if layer.GetName() in ["sonoma_county_building_outlines_filtered"]: # simplified_conflated_buildings # _filtered + return layer + +def filterTags(attrs): + if attrs is None: + print("filterTags: empty") + return None + + tags = { "building": "yes" } + + if "gid" in attrs and attrs["gid"] != "": + tags["x_son_p:gid"] = attrs["gid"] + if "addr:state" in attrs and attrs["addr:state"] != "": + tags["addr:state"] = attrs["addr:state"] + if "addr:city" in attrs and attrs["addr:city"] != "": + tags["addr:city"] = attrs["addr:city"] + if "addr:street" in attrs and attrs["addr:street"] != "": + tags["addr:street"] = attrs["addr:street"] + if "addr:housenumber" in attrs and attrs["addr:housenumber"] != "": + tags["addr:housenumber"] = attrs["addr:housenumber"] + if "addr:unit" in attrs and attrs["addr:unit"] != "": + tags["addr:unit"] = attrs["addr:unit"] + if "x_son_imp:usecode" in attrs and attrs["x_son_imp:usecode"] != "": + tags["sonoma:x_son_imp:usecode"] = attrs["x_son_imp:usecode"] #TODO: proper x_son_imp:usecode + + return tags diff --git a/trial.sh b/trial.sh new file mode 100755 index 0000000..0b2116a --- /dev/null +++ b/trial.sh @@ -0,0 +1,73 @@ +# Before running, download VTA TAZ data from Google Drive here: +# https://drive.google.com/file/d/0B098fXDVjQOhVHBFS0kwcDNGRlU/view +# and place into a folder named "data" +# (might need to rename VTATaz.dbf) + +export DBNAME=openstreetmap +export OGR2OSM=../ogr2osm/ogr2osm.py +export PGUSER=openstreetmap +export PGPASSWORD=openstreetmap +export PGHOST=localhost +export PGPORT=5432 + +# DB setup +psql --echo-all --command="create extension if not exists hstore;" "${DBNAME}" -h $PGHOST -U $PGUSER +psql --echo-all --command="create extension if not exists postgis;" "${DBNAME}" -h $PGHOST -U $PGUSER + +# Add ESRI:103240 to PostGIS for TAZ +# from https://github.com/Esri/projection-engine-db-doc/ +psql --echo-all --file="103240.sql" "${DBNAME}" -h $PGHOST -U $PGUSER + +echo "Importing TAZ" +shp2pgsql -d -D -s 103240 -I "data/VTATaz" | psql -d "${DBNAME}" -h $PGHOST -U $PGUSER >/dev/null + +# Conflate addresses to buildings +psql -v "ON_ERROR_STOP=true" --echo-queries --file="conflation.sql" "${DBNAME}" -h $PGHOST -U $PGUSER + + + +# Split into tasks +mkdir "out" +mkdir "out/intersecting" +mkdir "out/clean" + +for intersects in false true; do + if ${intersects}; then + outdir="intersecting" + intersectsQuery="conflated" + else + outdir="clean" + intersectsQuery="not conflated" + fi + + # The purpose of the out/*/buildings*.osm files is to publicly host, split, ready for tasking + # https://codeforsanjose.github.io/OSM-SouthBay/SJ_Buildings/out/clean/buildings_1323.osm + + ogr2ogr -sql "select 'https://github.com/zyphlar/sonoma-import/raw/main/out/${outdir}/buildings_' || key || '.osm' as import_url, ST_SimplifyPreserveTopology(geom, 4) from VTATaz" \ + -t_srs EPSG:4326 \ + "out/grouped_${outdir}_buildings_zones.geojson" \ + "PG:dbname=${DBNAME} host=${PGHOST} user=${PGUSER} password=${PGPASSWORD}" + sed -i 's/ //g' "out/grouped_${outdir}_buildings_zones.geojson" + + # TAZ IDs from 965 to 1050 are Sonoma County broken up unto convenient polygons + for cid in {965..1050}; do + # Skip empty TAZs + if [ $(psql --command="copy (select count(*) from VTATaz where key=${cid}) to stdout csv" ${DBNAME} -h $PGHOST -U $PGUSER) = 0 ]; then + continue + fi + + output="out/${outdir}/buildings_${cid}.osm" + + + # Filter export data to each CID + for layer in "sonoma_county_building_outlines"; do + psql -h $PGHOST -U $PGUSER -v "ON_ERROR_STOP=true" --echo-queries --command="create or replace view \"${layer}_filtered\" as select * from \"${layer}\" where ${intersectsQuery};" "${DBNAME}" + done + + # Export to OSM + python3 "${OGR2OSM}" "PG:dbname=${DBNAME} host=${PGHOST} user=${PGUSER} password=${PGPASSWORD}" -f -t trial.py --no-memory-copy -o "${output}" + + # Add sample region outline + #sed -i '3i' "${output}" + done +done