Geometries are projected to a metric coordinate system (EPSG:2264) to enable distance-based clustering. Spatial coordinates are derived using interior points, and time is transformed into a numeric index relative to the start year.
k-distance plots are used to identify appropriate spatial neighborhood thresholds. Percentile-based summaries provide candidate values for ε (epsilon), supporting a data-driven parameter selection.
MIN_PTS_LIST <- c(10, 15, 20, 25, 30, 40, 50)
coords_xy <- ABT_proj %>%
st_drop_geometry() %>%
select(x, y) %>%
as.matrix()
storage.mode(coords_xy) <- "double"
plots_spatial <- list()
results_spatial <- list()
for (k in MIN_PTS_LIST) {
nn <- get.knn(coords_xy, k = k)
k_dist <- sort(nn$nn.dist[, k])
df_k <- data.frame(index = seq_along(k_dist), dist = k_dist)
plots_spatial[[as.character(k)]] <- ggplot(df_k, aes(index, dist)) +
geom_line(color = "steelblue") +
labs(title = paste("MinPts =", k),
x = "Sorted observations",
y = "k-distance (feet)") +
theme_minimal()
results_spatial[[as.character(k)]] <- data.frame(
MinPts = k,
p85 = quantile(k_dist, 0.85),
p90 = quantile(k_dist, 0.90),
p95 = quantile(k_dist, 0.95),
p96 = quantile(k_dist, 0.96),
p97 = quantile(k_dist, 0.97),
p98 = quantile(k_dist, 0.98),
p99 = quantile(k_dist, 0.99)
)
}
results_spatial_df <- bind_rows(results_spatial)
results_spatial_df
grid.arrange(grobs = plots_spatial, ncol = 3)
Final clustering parameters are selected based on diagnostic outputs. The spatial threshold is derived from percentile-based k-distance statistics, and the temporal window is defined based on domain knowledge.
MIN_PTS <- 15
eps_spatial <- results_spatial_df %>%
filter(MinPts == MIN_PTS) %>%
pull(p96)
TEMPORAL_WINDOW_YEARS <- 2
cat("Selected properties:", "\n")
## Selected properties:
cat("MinPts:", MIN_PTS, "\n")
## MinPts: 15
cat("eps_spatial:", eps_spatial, "feet\n")
## eps_spatial: 5445.264 feet
cat("Temporal window:", TEMPORAL_WINDOW_YEARS, "years\n")
## Temporal window: 2 years
ST-DBSCAN is applied to identify clusters in the combined spatio-temporal space.
Clusters are visualized spatially to assess geographic patterns.
ABT_wgs <- st_transform(ABT_proj, 4326)
# Ensure cluster is factor
ABT_wgs$cluster <- as.factor(ABT_wgs$cluster)
# Identify noise label
noise_label <- "0"
# Define color palette
cluster_levels <- levels(ABT_wgs$cluster)
cluster_colors <- setNames(
scales::hue_pal()(length(cluster_levels)),
cluster_levels
)
# Override noise color
if (noise_label %in% cluster_levels) {
cluster_colors[noise_label] <- "grey80"
}
pal <- leaflet::colorFactor(
palette = cluster_colors,
domain = cluster_levels,
na.color = "grey80"
)
# Leaflet map
leaflet(ABT_wgs) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
fillColor = ~pal(cluster),
fillOpacity = 0.6,
color = ~pal(cluster),
weight = 0.2,
smoothFactor = 0.2,
# Hover label
label = ~paste0(
"Cluster: ", cluster,
"<br>Year: ", year
) %>% lapply(htmltools::HTML),
# Click popup
popup = ~paste0(
"<b>Cluster:</b> ", cluster,
"<br><b>Year:</b> ", year
)
) %>%
addLegend(
position = "bottomleft",
pal = pal,
values = cluster_levels,
title = "Clusters",
opacity = 1
)