Improve unit/city/state; TODO: ensure conflation actually matches at each step

This commit is contained in:
Will Bradley 2021-11-30 15:12:22 -08:00
parent 10b2e5848d
commit 0662cf6138

View File

@ -537,8 +537,9 @@ WITH bcounts AS (
UPDATE parcels_public_shapefile SET building_count = count UPDATE parcels_public_shapefile SET building_count = count
FROM bcounts WHERE bcounts.gid = parcels_public_shapefile.gid; FROM bcounts WHERE bcounts.gid = parcels_public_shapefile.gid;
-- clear out data
UPDATE buildings SET "addr:housenumber" = NULL, "addr:street" = NULL, "addr:unit" = NULL, "addr:city" = NULL, "addr:state" = NULL;
-- add addresses to buildings with simple 1:1 matches to parcels -- add addresses to buildings with simple 1:1 matches to parcels
UPDATE buildings SET "addr:housenumber" = NULL, "addr:street" = NULL;
WITH a AS ( WITH a AS (
SELECT SELECT
b.gid, b.gid,
@ -589,7 +590,7 @@ FROM sizes WHERE sizes.bid = buildings.gid;
-- now assign addresses to main buildings on parcels with outbuildings -- now assign addresses to main buildings on parcels with outbuildings
WITH a AS ( WITH a AS (
SELECT SELECT
b.gid, p."addr:housenumber", p."addr:street", p.usecode b.gid, p."addr:housenumber", p."addr:street", p."addr:unit", p."addr:city", p."addr:state", p.usecode
FROM buildings AS b JOIN parcels_public_shapefile AS p ON FROM buildings AS b JOIN parcels_public_shapefile AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Intersects(b.loc_geom,p.loc_geom) AND
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom) ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
@ -601,6 +602,9 @@ WITH a AS (
UPDATE buildings SET UPDATE buildings SET
"addr:housenumber" = a."addr:housenumber", "addr:housenumber" = a."addr:housenumber",
"addr:street" = a."addr:street", "addr:street" = a."addr:street",
"addr:unit" = a."addr:unit",
"addr:city" = a."addr:city",
"addr:state" = a."addr:state",
"usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING "usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
FROM a WHERE buildings.gid = a.gid; FROM a WHERE buildings.gid = a.gid;
@ -634,6 +638,9 @@ WITH addresses AS (
p.gid, p.gid,
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber, array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber,
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street, array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
array_to_string( ARRAY_AGG(DISTINCT p."addr:unit"), ';') AS unit,
array_to_string( ARRAY_AGG(DISTINCT p."addr:city"), ';') AS city,
array_to_string( ARRAY_AGG(DISTINCT p."addr:state"), ';') AS state,
array_to_string( ARRAY_AGG(DISTINCT p.usecode), ';') AS usecode array_to_string( ARRAY_AGG(DISTINCT p.usecode), ';') AS usecode
FROM buildings AS b JOIN parcels_public_shapefile AS p ON FROM buildings AS b JOIN parcels_public_shapefile AS p ON
ST_Intersects(b.loc_geom,p.loc_geom) AND ST_Intersects(b.loc_geom,p.loc_geom) AND
@ -647,6 +654,9 @@ WITH addresses AS (
UPDATE buildings AS b SET UPDATE buildings AS b SET
"addr:housenumber" = a.housenumber, "addr:housenumber" = a.housenumber,
"addr:street" = a.street, "addr:street" = a.street,
"addr:unit" = a.unit,
"addr:city" = a.city,
"addr:state" = a.state,
"usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING "usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
FROM addresses AS a FROM addresses AS a
WHERE a.gid = b.gid; WHERE a.gid = b.gid;
@ -658,8 +668,12 @@ WHERE a.gid = b.gid;
WITH addresses AS ( WITH addresses AS (
SELECT SELECT
p.gid, p.gid,
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS addrno, array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS housenumber,
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street, array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
array_to_string( ARRAY_AGG(DISTINCT p."addr:unit"), ';') AS unit,
array_to_string( ARRAY_AGG(DISTINCT p."addr:city"), ';') AS city,
array_to_string( ARRAY_AGG(DISTINCT p."addr:state"), ';') AS state,
array_to_string( ARRAY_AGG(DISTINCT p.usecode), ';') AS usecode,
COUNT(*) AS count COUNT(*) AS count
FROM buildings AS b FROM buildings AS b
JOIN parcels_public_shapefile AS p ON JOIN parcels_public_shapefile AS p ON
@ -673,8 +687,12 @@ WITH addresses AS (
GROUP BY p.gid GROUP BY p.gid
) )
UPDATE buildings AS b SET UPDATE buildings AS b SET
"addr:housenumber" = addrno, "addr:housenumber" = a.housenumber,
"addr:street" = street "addr:street" = a.street,
"addr:unit" = a.unit,
"addr:city" = a.city,
"addr:state" = a.state,
"usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
FROM addresses AS a FROM addresses AS a
WHERE WHERE
a.count = 1 AND -- only simple cases, no duplicate address tags! a.count = 1 AND -- only simple cases, no duplicate address tags!