mirror of
https://github.com/zyphlar/sonoma-import.git
synced 2024-03-08 15:07:48 +00:00
working on fixing spot checked validation errors
This commit is contained in:
parent
b19515c758
commit
f4bef0bc77
12
README.md
12
README.md
|
@ -132,6 +132,18 @@ Please ensure you are logged in under a dedicated import account with a user nam
|
||||||
- TODO:
|
- TODO:
|
||||||
- Ensure `source=Sonoma County` is on each changeset
|
- Ensure `source=Sonoma County` is on each changeset
|
||||||
- Open OSM files in JOSM and run the validator on them
|
- Open OSM files in JOSM and run the validator on them
|
||||||
|
- Why no city name?
|
||||||
|
- Duplicate addresses:
|
||||||
|
- 226 5th Street, Petaluma (southerly should be 6th)
|
||||||
|
- 309, 709, 711, 900, 942 5th Street
|
||||||
|
- 425 B Street
|
||||||
|
- 117 East Court
|
||||||
|
- 323 Edith St
|
||||||
|
- 501 Mountain View Ave
|
||||||
|
- 335 Palmer St
|
||||||
|
- 710 Petaluma Blvd S
|
||||||
|
- 221, 337 Wilson St
|
||||||
|
- Remove "0" housenumbers
|
||||||
|
|
||||||
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
|
- http://download.geofabrik.de/north-america/us/california/norcal-latest.osm.pbf
|
||||||
|
|
||||||
|
|
|
@ -191,18 +191,30 @@ update parcels__public_ SET "addr:housenumber" = NULL,
|
||||||
"addr:city" = NULL,
|
"addr:city" = NULL,
|
||||||
"addr:state" = NULL;
|
"addr:state" = NULL;
|
||||||
|
|
||||||
-- parse city and state
|
-- parse city* and state
|
||||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\1')),
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\1')),
|
||||||
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\2')
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+)\* ([A-Za-z]+)$', '\2')
|
||||||
where situsfmt2 SIMILAR TO '([A-Za-z]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
where situsfmt2 SIMILAR TO '([A-Za-z]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||||
-- with spaces
|
-- city* with spaces
|
||||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\1')),
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\1')),
|
||||||
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\2')
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+)$', '\2')
|
||||||
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||||
-- with spaces and zip
|
-- city and state (no *)
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+) ([A-Za-z]+)$', '\1')),
|
||||||
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z]+) ([A-Za-z]+)$', '\2')
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z]+) ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||||
|
-- city with spaces (no *)
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) ([A-Za-z]+)$', '\1')),
|
||||||
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) ([A-Za-z]+)$', '\2')
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+) ([A-Za-z]+)' AND "addr:city" IS NULL;
|
||||||
|
-- city* with spaces and zip
|
||||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\1')),
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\1')),
|
||||||
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\2')
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+$', '\2')
|
||||||
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+' AND "addr:city" IS NULL;
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+)\* ([A-Za-z]+) [0-9]+' AND "addr:city" IS NULL;
|
||||||
|
-- city with spaces and zip (no *)
|
||||||
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) ([A-Za-z]+) [0-9]+$', '\1')),
|
||||||
|
"addr:state" = REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) ([A-Za-z]+) [0-9]+$', '\2')
|
||||||
|
where situsfmt2 SIMILAR TO '([A-Za-z ]+) ([A-Za-z]+) [0-9]+' AND "addr:city" IS NULL;
|
||||||
-- with no asterisk, and "Ca" suffix
|
-- with no asterisk, and "Ca" suffix
|
||||||
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA$', '\1')),
|
update parcels__public_ SET "addr:city" = initcap(REGEXP_REPLACE(situsfmt2,'^([A-Za-z ]+) CA$', '\1')),
|
||||||
"addr:state" = 'CA'
|
"addr:state" = 'CA'
|
||||||
|
@ -218,7 +230,7 @@ update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unincorp Cou
|
||||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unknown';
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Unknown';
|
||||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Bodega Bay';
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Bodega Bay';
|
||||||
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Duncans Mills';
|
update parcels__public_ SET "addr:city" = NULL where "addr:city" = 'Area Duncans Mills';
|
||||||
update parcels__public_ SET "addr:state" = 'California' where "addr:state" = 'CA';
|
-- update parcels__public_ SET "addr:state" = 'California' where "addr:state" = 'CA';
|
||||||
|
|
||||||
|
|
||||||
-- basic 123 Main with no common suffixes or numbers
|
-- basic 123 Main with no common suffixes or numbers
|
||||||
|
@ -236,8 +248,12 @@ update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt
|
||||||
|
|
||||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', '\1')),
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', '\1')),
|
||||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)';
|
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)';
|
||||||
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)\s+([A-Z]{1})$', '\1')),
|
||||||
|
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HWY ([0-9]+)\s+([A-Z]{1})$', 'Highway \2 \3')) where situsfmt1 SIMILAR TO '([0-9]+) HWY ([0-9]+)\s+([A-Z]{1})';
|
||||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', '\1')),
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', '\1')),
|
||||||
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HIGHWAY ([0-9]+)';
|
"addr:street" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) HIGHWAY ([0-9]+)$', 'Highway \2')) where situsfmt1 SIMILAR TO '([0-9]+) HIGHWAY ([0-9]+)';
|
||||||
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) VER-NI RD$', '\1')),
|
||||||
|
"addr:street" = 'Ver-ni Road' where situsfmt1 SIMILAR TO '([0-9]+) VER-NI RD';
|
||||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ARROWHEAD MTN TRL$', '\1')),
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) ARROWHEAD MTN TRL$', '\1')),
|
||||||
"addr:street" = 'Arrowhead Mountain Trail' where situsfmt1 SIMILAR TO '([0-9]+) ARROWHEAD MTN TRL';
|
"addr:street" = 'Arrowhead Mountain Trail' where situsfmt1 SIMILAR TO '([0-9]+) ARROWHEAD MTN TRL';
|
||||||
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S FITCH MTN RD$', '\1')),
|
update parcels__public_ SET "addr:housenumber" = initcap(REGEXP_REPLACE(situsfmt1, '^([0-9]+) S FITCH MTN RD$', '\1')),
|
||||||
|
@ -586,16 +602,20 @@ UPDATE sonoma_county_building_outlines SET
|
||||||
"usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
|
"usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
|
||||||
FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
|
FROM a WHERE sonoma_county_building_outlines.gid = a.gid;
|
||||||
|
|
||||||
|
|
||||||
|
-- TODO: here is where the first real 226 is added
|
||||||
|
|
||||||
|
|
||||||
-- get a count of outbuildings so we know how many addresses are intentionally unassigned
|
-- get a count of outbuildings so we know how many addresses are intentionally unassigned
|
||||||
SELECT
|
-- SELECT
|
||||||
COUNT(*)
|
-- COUNT(*)
|
||||||
FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
|
-- FROM sonoma_county_building_outlines AS b JOIN parcels__public_ AS p ON
|
||||||
ST_Intersects(b.loc_geom,p.loc_geom) AND
|
-- ST_Intersects(b.loc_geom,p.loc_geom) AND
|
||||||
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
|
-- ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.9*ST_Area(b.loc_geom)
|
||||||
WHERE
|
-- WHERE
|
||||||
p.building_count IN (2,3)
|
-- p.building_count IN (2,3)
|
||||||
AND NOT p.repeating
|
-- AND NOT p.repeating
|
||||||
AND NOT b.main; -- is NOT main building
|
-- AND NOT b.main; -- is NOT main building
|
||||||
|
|
||||||
-- result: 44090
|
-- result: 44090
|
||||||
|
|
||||||
|
@ -605,6 +625,7 @@ WHERE
|
||||||
-- result: 155217
|
-- result: 155217
|
||||||
|
|
||||||
-- try to assign multiple addresses from multiple parcels to single buildings
|
-- try to assign multiple addresses from multiple parcels to single buildings
|
||||||
|
-- however i think this just detected a single case of duplicated GIDs in the database
|
||||||
WITH addresses AS (
|
WITH addresses AS (
|
||||||
SELECT
|
SELECT
|
||||||
b.gid,
|
b.gid,
|
||||||
|
@ -618,22 +639,21 @@ WITH addresses AS (
|
||||||
p.building_count = 1 AND
|
p.building_count = 1 AND
|
||||||
p.repeating AND
|
p.repeating AND
|
||||||
b."addr:housenumber" IS NULL
|
b."addr:housenumber" IS NULL
|
||||||
GROUP BY b.gid
|
GROUP BY p.gid
|
||||||
)
|
)
|
||||||
UPDATE sonoma_county_building_outlines AS b SET
|
UPDATE sonoma_county_building_outlines AS b SET
|
||||||
"addr:housenumber" = housenumber,
|
"addr:housenumber" = housenumber,
|
||||||
"addr:street" = street,
|
"addr:street" = street,
|
||||||
"usecode" = CAST( a.usecode as INTEGER ) -- the original data is VARYING
|
"usecode" = CAST( p.usecode as INTEGER ) -- the original data is VARYING
|
||||||
FROM addresses AS a
|
FROM addresses AS p
|
||||||
WHERE a.gid = b.gid;
|
WHERE p.gid = b.gid;
|
||||||
|
|
||||||
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
|
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
|
||||||
-- result: 0, may not be working TODO
|
-- result: 0, may not be working TODO
|
||||||
|
|
||||||
-- try to identify addresses for buildings across multiple parcels
|
-- try to identify addresses for buildings across multiple parcels: must be >50% on that parcel
|
||||||
-- todo: this may not have done anything
|
|
||||||
WITH addresses AS (
|
WITH addresses AS (
|
||||||
SELECT
|
SELECT
|
||||||
b.gid,
|
b.gid,
|
||||||
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS addrno,
|
array_to_string( ARRAY_AGG(DISTINCT p."addr:housenumber"), ';') AS addrno,
|
||||||
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
|
array_to_string( ARRAY_AGG(DISTINCT p."addr:street"), ';') AS street,
|
||||||
|
@ -641,21 +661,24 @@ WITH addresses AS (
|
||||||
FROM sonoma_county_building_outlines AS b
|
FROM sonoma_county_building_outlines AS b
|
||||||
JOIN parcels__public_ AS p ON
|
JOIN parcels__public_ AS p ON
|
||||||
ST_Intersects(b.loc_geom,p.loc_geom) AND
|
ST_Intersects(b.loc_geom,p.loc_geom) AND
|
||||||
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) < 0.9*ST_Area(b.loc_geom)
|
ST_Area(ST_Intersection(b.loc_geom,p.loc_geom)) > 0.5*ST_Area(b.loc_geom)
|
||||||
WHERE
|
WHERE
|
||||||
b."addr:housenumber" IS NULL AND
|
b."addr:housenumber" IS NULL AND
|
||||||
NOT p.repeating AND
|
NOT p.repeating AND
|
||||||
p."addr:housenumber" IS NOT NULL AND
|
p."addr:housenumber" IS NOT NULL AND
|
||||||
b.shape__are > 1000 -- assuming sqft
|
b.shape__are > 1000 -- assuming sqft
|
||||||
GROUP BY b.gid
|
GROUP BY p.gid
|
||||||
)
|
)
|
||||||
UPDATE sonoma_county_building_outlines AS b SET
|
UPDATE sonoma_county_building_outlines AS b SET
|
||||||
"addr:housenumber" = addrno,
|
"addr:housenumber" = addrno,
|
||||||
"addr:street" = street
|
"addr:street" = street
|
||||||
FROM addresses AS a
|
FROM addresses AS p
|
||||||
WHERE
|
WHERE
|
||||||
count = 1 AND -- only simple cases!
|
count = 1 AND -- only simple cases, no duplicate address tags!
|
||||||
a.gid = b.gid;
|
p.gid = b.gid;
|
||||||
|
|
||||||
|
-- TODO: here is where the second 226 is erroneously added
|
||||||
|
-- the 0.9 slice of area is pretty liberal, we probably want closer to half?
|
||||||
|
|
||||||
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
|
--select * from sonoma_county_building_outlines where "addr:housenumber" LIKE '%;%' OR "addr:street" LIKE '%;%';
|
||||||
-- result: 0, may not be working TODO
|
-- result: 0, may not be working TODO
|
||||||
|
|
Loading…
Reference in New Issue
Block a user