Properly load up and conflate VTATaz CID with buildings

This commit is contained in:
Will Bradley 2021-02-25 00:22:45 -08:00
parent ef4bfeed3e
commit 7624a68152
3 changed files with 109 additions and 110 deletions

View File

@ -1,3 +1,4 @@
delete from spatial_ref_sys where srid = 103240;
INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) INSERT into spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext)
values (103240, 'ESRI', 103240, values (103240, 'ESRI', 103240,
'+proj=lcc +ellps=GRS80 +units=us-ft +x_0=2000000.0 +y_0=500000.0 +lon_0=120d30''W +lat_0=36d30''N +lat_1=37d4''N +lat_2=38d26''N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs ', '+proj=lcc +ellps=GRS80 +units=us-ft +x_0=2000000.0 +y_0=500000.0 +lon_0=120d30''W +lat_0=36d30''N +lat_1=37d4''N +lat_2=38d26''N +towgs84=-0.9956000824677655,1.901299877314078,0.5215002840524426,0.02591500053005733,0.009425998542707753,0.01159900118427752,-0.00062000005129903 +no_defs ',

View File

@ -10,15 +10,16 @@ ALTER TABLE sonoma_county_building_outlines
ADD COLUMN IF NOT EXISTS "addr:city" text, ADD COLUMN IF NOT EXISTS "addr:city" text,
ADD COLUMN IF NOT EXISTS "addr:state" text, ADD COLUMN IF NOT EXISTS "addr:state" text,
ADD COLUMN IF NOT EXISTS usecode integer, ADD COLUMN IF NOT EXISTS usecode integer,
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), ADD COLUMN IF NOT EXISTS cid integer,
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE, ADD COLUMN IF NOT EXISTS conflated boolean DEFAULT FALSE,
ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel? ADD COLUMN IF NOT EXISTS main boolean; -- is it the main building on the parcel?
update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL; update sonoma_county_building_outlines set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL, usecode = NULL, cid = NULL;
-- create local geometry fields and validate geometries -- create local geometry fields and validate geometries
--UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom); UPDATE sonoma_county_building_outlines SET loc_geom = ST_MakeValid(geom);
--CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom); CREATE INDEX ON sonoma_county_building_outlines USING GIST (loc_geom);
-- added fields for the parcels table -- added fields for the parcels table
ALTER TABLE parcels__public_ ALTER TABLE parcels__public_
@ -27,15 +28,16 @@ ALTER TABLE parcels__public_
ADD COLUMN IF NOT EXISTS "addr:unit" text, ADD COLUMN IF NOT EXISTS "addr:unit" text,
ADD COLUMN IF NOT EXISTS "addr:city" text, ADD COLUMN IF NOT EXISTS "addr:city" text,
ADD COLUMN IF NOT EXISTS "addr:state" text, ADD COLUMN IF NOT EXISTS "addr:state" text,
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), ADD COLUMN IF NOT EXISTS usecode integer,
ADD COLUMN IF NOT EXISTS loc_geom geometry(multipolygon,4326), -- local is the same in this case, except made valid
ADD COLUMN IF NOT EXISTS building_count integer, ADD COLUMN IF NOT EXISTS building_count integer,
ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE; ADD COLUMN IF NOT EXISTS repeating BOOLEAN DEFAULT FALSE;
update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL; update parcels__public_ set "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
-- create local geometry fields and validate geometries -- create local geometry fields and validate geometries
--UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom); UPDATE parcels__public_ SET loc_geom = ST_MakeValid(geom);
--CREATE INDEX ON parcels__public_ USING GIST (loc_geom); CREATE INDEX ON parcels__public_ USING GIST (loc_geom);
-- parse and expand parcel street addresses -- parse and expand parcel street addresses
-- TODO: find/handle oddballs like 123A Main St and 123 Main St #4 -- TODO: find/handle oddballs like 123A Main St and 123 Main St #4
@ -326,6 +328,7 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
-- 292 ELY BLVD S BLVD -- 292 ELY BLVD S BLVD
-- now 1460 TOWN & COUNTRY DR -- now 1460 TOWN & COUNTRY DR
--TODO: this does not seem to have any effect, needs rework
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z &]{4,99}) ([A-Z]{2,99})$', '\2')) -- Town & Country
|| ' ' -- space || ' ' -- space
@ -391,6 +394,7 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)'; where "addr:housenumber" IS NULL and situsfmt1 SIMILAR TO '([0-9]+) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99}) ([A-Z]{2,99})[ ]+[#]+([ 0-9A-Z\-]+)';
-- no direction, two words in street name, and "STE XXX" or "Ste XXX" in the unit -- no direction, two words in street name, and "STE XXX" or "Ste XXX" in the unit
-- TODO: this first one seems to have no effect
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\1')), -- 123 update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\1')), -- 123
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th "addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\2 ')) -- Main / 4th
|| initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\3'))), -- Street || initcap(expand_road(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ([0-9A-Z]+) ([A-Z]+)[ ]+[#]+STE ([0-9A-Z\-]+)$', '\3'))), -- Street
@ -426,6 +430,7 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)'; where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+) ([A-Z]+)[ ]+#([0-9A-Z]+)';
-- 3333 STEWART PT SKAGGS SPRING RD is a unique case that needs to be Stewarts Point-Skaggs Springs Road -- 3333 STEWART PT SKAGGS SPRING RD is a unique case that needs to be Stewarts Point-Skaggs Springs Road
-- TODO: this seems to have no effect
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) STEWART PT SKAGGS SPRING RD$', '\1')), update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) STEWART PT SKAGGS SPRING RD$', '\1')),
"addr:street" = 'Stewarts Point-Skaggs Springs Road' "addr:street" = 'Stewarts Point-Skaggs Springs Road'
where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) STEWART PT SKAGGS SPRING RD'; where "addr:housenumber" is null and situsfmt1 SIMILAR TO '([0-9]+) STEWART PT SKAGGS SPRING RD';
@ -511,7 +516,7 @@ UPDATE sonoma_county_building_outlines SET
"addr:unit" = a."addr:unit", "addr:unit" = a."addr:unit",
"addr:city" = a."addr:city", "addr:city" = a."addr:city",
"addr:state" = a."addr:state", "addr:state" = a."addr:state",
"usecode" = a.usecode "usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
FROM a WHERE sonoma_county_building_outlines.gid = a.gid; FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
--SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL; --SELECT COUNT(*) FROM sonoma_county_building_outlines WHERE "addr:housenumber" IS NOT NULL OR "addr:street" IS NOT NULL;
@ -553,7 +558,7 @@ WITH a AS (
UPDATE sonoma_county_building_outlines SET UPDATE sonoma_county_building_outlines SET
"addr:housenumber" = a."addr:housenumber", "addr:housenumber" = a."addr:housenumber",
"addr:street" = a."addr:street", "addr:street" = a."addr:street",
"usecode" = a.usecode "usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
FROM a WHERE sonoma_county_building_outlines.gid = a.gid; FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
-- get a count of outbuildings so we know how many addresses are intentionally unassigned -- get a count of outbuildings so we know how many addresses are intentionally unassigned
@ -580,7 +585,7 @@ WITH addresses AS (
b.gid, b.gid,
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber, array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber,
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street, array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
p.usecode array_to_string( ARRAY_AGG(DISTINCT p.usecode), ';') AS usecode
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -593,7 +598,7 @@ WITH addresses AS (
UPDATE sonoma_county_building_outlines AS b SET UPDATE sonoma_county_building_outlines AS b SET
"addr:housenumber" = housenumber, "addr:housenumber" = housenumber,
"addr:street" = street, "addr:street" = street,
"usecode" = a.usecode "usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
FROM addresses AS a FROM addresses AS a
WHERE a.gid = b.gid; WHERE a.gid = b.gid;
@ -638,127 +643,120 @@ WHERE
-- --
-- IF USING Overpass -> QGIS -> Postgres Dump: -- IF USING Overpass -> QGIS -> Postgres Dump:
UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE; -- UPDATE sonoma_county_building_outlines AS b SET conflated = FALSE;
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE -- UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
FROM osmquery_buildings_pgdump AS osm -- FROM osmquery_buildings_pgdump AS osm
WHERE ST_Intersects(b.geom,osm.wkb_geometry) -- WHERE ST_Intersects(b.geom,osm.wkb_geometry)
AND osm.building IS NOT NULL and osm.building != 'no'; -- AND osm.building IS NOT NULL and osm.building != 'no';
-- IF USING a direct OSM2PGSQL import: -- IF USING a direct OSM2PGSQL import i.e. norcal-latest.osm.pbf:
UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE UPDATE sonoma_county_building_outlines AS b SET conflated = TRUE
FROM son_polygon AS osm FROM son_polygon AS osm
WHERE ST_Intersects(b.geom,osm.way) WHERE ST_Intersects(b.geom,osm.way) --TODO: loc_geom
AND osm.building IS NOT NULL and osm.building != 'no'; AND osm.building IS NOT NULL and osm.building != 'no';
-- dump simplified polygon geometries and OSM relavant fields into another table for exporting -- dump simplified polygon geometries and OSM relavant fields into another table for exporting
-- this code is based on https://trac.osgeo.org/postgis/wiki/UsersWikiSimplifyPreserveTopology -- this code is based on https://trac.osgeo.org/postgis/wiki/UsersWikiSimplifyPreserveTopology
-- it does take a very long time to run on this dataset... -- it does take a very long time to run on this dataset...
-- TODO: this simplified conflated stuff kinda sucks, the geometry is too simple and there's duplicated GIDs, what if we just don't use it
-- first do conflated buildings -- first do conflated buildings
with poly as ( -- drop table simplified_conflated_buildings;
SELECT -- with poly as (
gid, -- SELECT
"addr:housenumber", -- gid,
"addr:street", -- "addr:housenumber",
"addr:unit", -- "addr:street",
(st_dump(loc_geom)).* -- "addr:unit",
FROM sonoma_county_building_outlines -- (st_dump(loc_geom)).*
WHERE conflated -- FROM sonoma_county_building_outlines
) -- WHERE conflated
SELECT -- )
poly.gid, -- SELECT
poly."addr:housenumber", -- poly.gid,
poly."addr:street", -- poly."addr:housenumber",
poly."addr:unit", -- poly."addr:street",
ST_Transform(baz.geom,4326) AS geom -- poly."addr:unit",
INTO simplified_conflated_buildings -- baz.geom AS geom -- ST_Transform(baz.geom,4326) AS geom
FROM ( -- INTO simplified_conflated_buildings
SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom -- FROM (
FROM ( -- SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom
-- simplify geometries to a 0.2m tolerance to avoid repeated points -- FROM (
SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom -- -- simplify geometries to a 0.2m tolerance to avoid repeated points
FROM ( -- SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom
SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom -- FROM (
FROM poly -- SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom
) AS foo -- FROM poly
) AS bar -- ) AS foo
) AS baz, poly -- ) AS bar
WHERE -- ) AS baz, poly
ST_Intersects(poly.geom, baz.geom) -- WHERE
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; -- ST_Intersects(poly.geom, baz.geom)
ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid); -- AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
-- ALTER TABLE simplified_conflated_buildings ADD CONSTRAINT temp1_pkey PRIMARY KEY (gid);
-- 233966 duplicated, deleted smaller
-- 248900 duplicated, deleted smaller
-- 246427 duplicated, deleted smaller
-- 240471 duplicated, deleted smaller
-- 277549 duplicated, deleted smaller
-- 269953
-- next do non-conflated buildings separately -- next do non-conflated buildings separately
with poly as ( -- with poly as (
SELECT -- SELECT
gid, -- gid,
"addr:housenumber", -- "addr:housenumber",
"addr:street", -- "addr:street",
"addr:unit", -- "addr:unit",
(st_dump(loc_geom)).* -- (st_dump(loc_geom)).*
FROM sonoma_county_building_outlines -- FROM sonoma_county_building_outlines
WHERE NOT conflated --note: NOT -- WHERE NOT conflated --note: NOT
) -- )
SELECT -- SELECT
poly.gid, -- poly.gid,
poly."addr:housenumber", -- poly."addr:housenumber",
poly."addr:street", -- poly."addr:street",
poly."addr:unit", -- poly."addr:unit",
ST_Transform(baz.geom,4326) AS geom -- ST_Transform(baz.geom,4326) AS geom
INTO simplified_buildings -- INTO simplified_buildings
FROM ( -- FROM (
SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom -- SELECT (ST_Dump(ST_Polygonize(distinct geom))).geom as geom
FROM ( -- FROM (
-- simplify geometries to a 0.2m tolerance to avoid repeated points -- -- simplify geometries to a 0.2m tolerance to avoid repeated points
SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom -- SELECT (ST_Dump(st_simplifyPreserveTopology(ST_Linemerge(st_union(geom)), 0.2))).geom as geom
FROM ( -- FROM (
SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom -- SELECT ST_ExteriorRing((ST_DumpRings(geom)).geom) as geom
FROM poly -- FROM poly
) AS foo -- ) AS foo
) AS bar -- ) AS bar
) AS baz, poly -- ) AS baz, poly
WHERE -- WHERE
ST_Intersects(poly.geom, baz.geom) -- ST_Intersects(poly.geom, baz.geom)
AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9; -- AND ST_Area(st_intersection(poly.geom, baz.geom))/ST_Area(baz.geom) > 0.9;
-- project VTATaz coordinates into osm coordinates for matching
alter table VTATaz add column if not exists geom_4326 geometry(multipolygon, 4326);
update VTATaz set geom_4326 = ST_MakeValid(ST_Transform(ST_Multi(geom), 4326));
create index if not exists "geom_4326_idx" on VTATaz using GIST(geom_4326);
-- Drop TAZs that aren't near our dataset
------- TODO
alter table sonoma_building_outlines add column cid integer;
-- Drop TAZs that aren't near SJ
with hull as ( with hull as (
select ST_ConvexHull(ST_Collect(geom)) as geom from ( select ST_ConvexHull(ST_Collect(geom)) as geom from (
union select geom select geom
from "sonoma_building_outlines" from "sonoma_county_building_outlines"
) as geom) ) as geom)
delete from VTATaz delete from VTATaz
using hull using hull
where not ST_Intersects(VTATaz.geom, hull.geom); where not ST_Intersects(geom_4326, hull.geom);
-- Assign cluster to each data point -- Assign cluster to each data point
update sonoma_building_outlines as t update sonoma_county_building_outlines as t
set cid = taggedThing.key set cid = taggedThing.key
from ( from (
select (row_number() over (partition by sonoma_building_outlines.gid order by ST_Distance(sonoma_building_outlines.geom, VTATaz.geom))) as rn, select (row_number() over (partition by sonoma_county_building_outlines.gid order by ST_Distance(sonoma_county_building_outlines.loc_geom, geom_4326))) as rn,
VTATaz.key, sonoma_building_outlines.gid VTATaz.key, sonoma_county_building_outlines.gid
from sonoma_building_outlines from sonoma_county_building_outlines
join VTATaz join VTATaz
on ST_Intersects(sonoma_building_outlines.geom, VTATaz.geom) on ST_Intersects(sonoma_county_building_outlines.loc_geom, geom_4326)
) as taggedThing ) as taggedThing
where t.gid = taggedThing.gid and rn = 1; where t.gid = taggedThing.gid and rn = 1;
-- More specifically drop TAZs that don't have any SJ data in them -- More specifically drop TAZs that don't have any SJ data in them
delete from VTATaz -- delete from VTATaz
where key not in ( -- where key not in (
select distinct cid from sonoma_building_outlines -- select distinct cid from sonoma_county_building_outlines
); -- );

View File

@ -61,7 +61,7 @@ for intersects in false true; do
# Filter export data to each CID # Filter export data to each CID
for layer in "sonoma_county_building_outlines"; do for layer in "sonoma_county_building_outlines"; do
psql -h $PGHOST -U $PGUSER -v "ON_ERROR_STOP=true" --echo-queries --command="create or replace view \"${layer}_filtered\" as select * from \"${layer}\" where ${intersectsQuery};" "${DBNAME}" psql -h $PGHOST -U $PGUSER -v "ON_ERROR_STOP=true" --echo-queries --command="create or replace view \"${layer}_filtered\" as select * from \"${layer}\" where cid=${cid} and ${intersectsQuery};" "${DBNAME}"
done done
# Export to OSM # Export to OSM