This commit is contained in:
2025-08-25 20:33:21 -07:00
parent ec0e4a6efb
commit ff0511b3c5
2 changed files with 195 additions and 22 deletions

View File

@@ -109,8 +109,8 @@ class RoadComparator:
"""Create a buffered union using chunked processing for memory efficiency."""
print("Creating optimized buffered union...")
# Process in chunks to manage memory
chunks = [gdf.iloc[i:i+self.chunk_size] for i in range(0, len(gdf), self.chunk_size)]
# Process in chunks to manage memory - extract geometries as lists
chunks = [gdf.iloc[i:i+self.chunk_size].geometry.tolist() for i in range(0, len(gdf), self.chunk_size)]
chunk_unions = []
# Use partial function for multiprocessing
@@ -144,17 +144,17 @@ class RoadComparator:
raise Exception("No valid geometries to create union")
@staticmethod
def _buffer_chunk(chunk_gdf: gpd.GeoDataFrame, tolerance: float) -> Any:
def _buffer_chunk(geometries: List, tolerance: float) -> Any:
"""Buffer geometries in a chunk and return their union."""
try:
# Buffer all geometries in the chunk
buffered = chunk_gdf.geometry.buffer(tolerance)
buffered = [geom.buffer(tolerance) for geom in geometries]
# Create union of buffered geometries
if len(buffered) == 1:
return buffered.iloc[0]
return buffered[0]
else:
return unary_union(buffered.tolist())
return unary_union(buffered)
except Exception as e:
print(f"Error in chunk processing: {str(e)}")
return None
@@ -174,9 +174,17 @@ class RoadComparator:
"""
print("Finding removed segments...")
# Split into chunks for parallel processing
chunks = [source_gdf.iloc[i:i+self.chunk_size]
for i in range(0, len(source_gdf), self.chunk_size)]
# Split into chunks for parallel processing - convert to serializable format
chunks = []
for i in range(0, len(source_gdf), self.chunk_size):
chunk_gdf = source_gdf.iloc[i:i+self.chunk_size]
chunk_data = []
for idx, row in chunk_gdf.iterrows():
chunk_data.append({
'geometry': row.geometry,
'properties': dict(row.drop('geometry'))
})
chunks.append(chunk_data)
all_removed = []
@@ -203,13 +211,14 @@ class RoadComparator:
return all_removed
@staticmethod
def _process_removed_chunk(chunk_gdf: gpd.GeoDataFrame, target_union: Any,
def _process_removed_chunk(chunk_data: List[Dict], target_union: Any,
min_length_deg: float) -> List[Dict[str, Any]]:
"""Process a chunk of geometries to find removed segments."""
removed_segments = []
for idx, row in chunk_gdf.iterrows():
geom = row.geometry
for row_data in chunk_data:
geom = row_data['geometry']
properties = row_data['properties']
# Handle MultiLineString by processing each component
if isinstance(geom, MultiLineString):
@@ -238,12 +247,12 @@ class RoadComparator:
for uncovered_line in uncovered_lines:
if uncovered_line.length >= min_length_deg:
# Create properties dict with original metadata plus 'removed: true'
properties = dict(row.drop('geometry'))
properties['removed'] = True
result_properties = properties.copy()
result_properties['removed'] = True
removed_segments.append({
'geometry': uncovered_line,
**properties
**result_properties
})
except Exception as e:
@@ -259,9 +268,17 @@ class RoadComparator:
"""
print("Finding added roads...")
# Split into chunks for parallel processing
chunks = [source_gdf.iloc[i:i+self.chunk_size]
for i in range(0, len(source_gdf), self.chunk_size)]
# Split into chunks for parallel processing - convert to serializable format
chunks = []
for i in range(0, len(source_gdf), self.chunk_size):
chunk_gdf = source_gdf.iloc[i:i+self.chunk_size]
chunk_data = []
for idx, row in chunk_gdf.iterrows():
chunk_data.append({
'geometry': row.geometry,
'properties': dict(row.drop('geometry'))
})
chunks.append(chunk_data)
all_added = []
@@ -288,13 +305,14 @@ class RoadComparator:
return all_added
@staticmethod
def _process_added_chunk(chunk_gdf: gpd.GeoDataFrame, target_union: Any,
def _process_added_chunk(chunk_data: List[Dict], target_union: Any,
min_length_deg: float) -> List[Dict[str, Any]]:
"""Process a chunk of geometries to find added roads."""
added_roads = []
for idx, row in chunk_gdf.iterrows():
geom = row.geometry
for row_data in chunk_data:
geom = row_data['geometry']
original_properties = row_data['properties']
try:
# Check what portion of the road is not covered
@@ -319,7 +337,6 @@ class RoadComparator:
if uncovered_ratio > 0.1:
#uncovered_length >= min_length_deg and
# Include entire original road with all original metadata
original_properties = dict(row.drop('geometry'))
#
# For Sumter County Roads