This study employs DBSCAN for detecting urban traffic accident hotspots, effectively identifying irregularly shaped clusters missed by conventional methods. The analysis combines geographical coordinates with temporal and categorical data to isolate high-density areas. Each hotspot is profiled by accident type and time pattern. The results offer actionable, spatially precise insights, guiding targeted safety interventions like infrastructure improvements, signal adjustments, and focused enforcement to enhance urban road safety.
Density-based clustering algorithms like DBSCAN (Density-Based Spatial Clustering of Applications with Noise) are uniquely suited to identify irregularly shaped hotspots in urban traffic accident data . Unlike conventional aggregation methods (e.g., k-means or grid-based analysis) that impose predefined cluster shapes or boundaries, DBSCAN groups areas based purely on the density of accident points. This capability allows for the discovery of non-linear patterns, such as accidents distributed along curved roads or complex intersections, which simpler spatial techniques might fail to accurately capture.
set.seed(42)
n_accidents <- 5000
accidents_raw <- tibble(
Longitude = runif(n_accidents, -74.0, -73.9),
Latitude = runif(n_accidents, 40.7, 40.8),
Severity = sample(c("Minor","Moderate","Severe"), n_accidents, replace=TRUE, prob=c(0.6,0.3,0.1)),
Type = sample(c("Rear-End","Side-Swipe","Pedestrian","Other"), n_accidents, replace=TRUE, prob=c(0.4,0.3,0.2,0.1)),
Time_Hour = sample(0:23, n_accidents, replace=TRUE)
)
# Simulate hotspots
accidents_raw$Longitude[1:100] <- runif(100, -73.95, -73.94)
accidents_raw$Latitude[1:100] <- runif(100, 40.75, 40.76)
accidents_raw$Type[1:100] <- "Pedestrian"
accidents_raw$Longitude[501:600] <- runif(100, -73.98, -73.97)
accidents_raw$Latitude[501:600] <- runif(100, 40.72, 40.73)
accidents_raw$Time_Hour[501:600] <- sample(16:19, 100, replace=TRUE)
accidents_clean <- accidents_raw %>% dplyr::select(Longitude, Latitude, Severity, Type, Time_Hour)dens <- kde2d(accidents_clean$Longitude, accidents_clean$Latitude, n = 100)
dens_values <- dens$z / max(dens$z)
dens_points <- expand.grid(Longitude=dens$x, Latitude=dens$y)
dens_points$Density <- as.vector(dens_values)
leaflet(dens_points) %>%
addTiles() %>%
addCircleMarkers(
lng=~Longitude, lat=~Latitude,
radius=5,
color="red",
fillOpacity=~Density,
stroke=FALSE
)leaflet(accidents_clustered) %>%
addTiles() %>%
addCircleMarkers(
lng=~Longitude, lat=~Latitude,
radius=5,
color=~ifelse(Cluster==0, "gray", RColorBrewer::brewer.pal(8,"Set1")[as.numeric(Cluster)]),
fillOpacity=0.8,
popup=~paste0("<b>Cluster:</b>",Cluster,"<br><b>Type:</b>",Type,"<br><b>Severity:</b>",Severity)
) %>%
addLegend(
"bottomright",
colors=c("gray", RColorBrewer::brewer.pal(8,"Set1")[1:max(as.numeric(accidents_clustered$Cluster))]),
labels=c("Noise", paste("Cluster",1:max(as.numeric(accidents_clustered$Cluster)))),
title="Accident Clusters"
)hotspot_list <- accidents_clustered %>%
filter(Cluster != 0) %>%
group_by(Cluster) %>%
group_split()
for(hotspot in hotspot_list){
cluster_id <- unique(hotspot$Cluster)
# Summary tables
type_summary <- hotspot %>%
group_by(Type) %>%
summarise(Count=n()) %>%
mutate(Proportion = Count/sum(Count))
time_summary <- hotspot %>%
group_by(Time_Category) %>%
summarise(Count=n()) %>%
mutate(Proportion = Count/sum(Count))
# Bar plots
p1 <- ggplot(type_summary, aes(x=Type, y=Count, fill=Type)) +
geom_bar(stat="identity") +
labs(title=paste("Accident Types - Cluster", cluster_id)) +
theme_minimal() +
theme(legend.position="none")
p2 <- ggplot(time_summary, aes(x=Time_Category, y=Count, fill=Time_Category)) +
geom_bar(stat="identity") +
labs(title=paste("Time Category - Cluster", cluster_id)) +
theme_minimal() +
theme(legend.position="none")
# Display plots side by side
grid.arrange(p1, p2, ncol=2)
# Cluster points map
print(
leaflet(hotspot) %>%
addTiles() %>%
addCircleMarkers(
lng=~Longitude, lat=~Latitude,
radius=5,
color="red",
fillOpacity=0.5,
stroke=FALSE
)
)
}