library(sf)
library(dplyr)
library(ggplot2)
library(tigris)
library(readr)
library(RColorBrewer)
library(classInt)
library(leaflet)
library(htmlwidgets)
options(tigris_use_cache = TRUE)
setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/2026/2027_TRBAM/Data/OtherData")
library(readxl)
dat= read_excel("Pedestrian2017_2025a.xlsx")
dim(dat)
## [1] 76463 152
## [1] "Crash_ID" "Crash_Date"
## [3] "Crash_Time" "Crash_Speed_Limit"
## [5] "Wthr_Cond_ID" "Light_Cond_ID"
## [7] "Entr_Road_ID" "Road_Type_ID"
## [9] "Road_Algn_ID" "Surf_Cond_ID"
## [11] "Traffic_Cntl_ID" "Investigat_Notify_Time"
## [13] "Investigat_Arrv_Time" "Harm_Evnt_ID"
## [15] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [17] "Obj_Struck_ID" "Othr_Factr_ID"
## [19] "Road_Part_Adj_ID" "Road_Cls_ID"
## [21] "Road_Relat_ID" "Cnty_ID"
## [23] "City_ID" "Latitude"
## [25] "Longitude" "Onsys_Fl"
## [27] "Rural_Fl" "Crash_Sev_ID"
## [29] "Pop_Group_ID" "Day_of_Week"
## [31] "Rural_Urban_Type_ID" "Func_Sys_ID"
## [33] "Adt_Curnt_Amt" "Adt_Curnt_Year"
## [35] "Year" "Investigator_Narrative"
## [37] "Tot_Injry_Cnt" "Death_Cnt"
## [39] "Sus_Serious_Injry_Cnt" "Secondary_Crash_Fl"
## [41] "Yr_Un" "Unit_ID"
## [43] "UnitNbr_Un" "Unit_Desc_ID"
## [45] "Veh_Lic_Plate_Nbr" "VIN"
## [47] "Veh_Mod_Year" "Veh_Color_ID"
## [49] "Veh_Make_ID" "Veh_Mod_ID"
## [51] "Veh_Body_Styl_ID" "Emer_Respndr_Fl"
## [53] "Veh_Damage_Description1_Id" "Veh_Damage_Severity1_Id"
## [55] "Veh_Cmv_Fl" "Contrib_Factr_1_ID"
## [57] "Contrib_Factr_2_ID" "Pedestrian_Action_ID"
## [59] "Pedalcyclist_Action_ID" "PBCAT_Pedestrian_ID"
## [61] "PBCAT_Pedalcyclist_ID" "E_Scooter_ID"
## [63] "Autonomous_Unit_ID" "Rpt_Autonomous_Level_Engaged_ID"
## [65] "Rpt_Autonomous_Unit_ID" "Cmv_Hazmat_Fl"
## [67] "Crash_ID_Pr" "Yr_Pr"
## [69] "UnitNbr_Pr" "Prsn_Nbr"
## [71] "Person_ID" "Prsn_Type_ID"
## [73] "Prsn_Occpnt_Pos_ID" "Prsn_Injry_Sev_ID"
## [75] "Prsn_Age" "Prsn_Ethnicity_ID"
## [77] "Prsn_Gndr_ID" "Prsn_Ejct_ID"
## [79] "Prsn_Rest_ID" "Prsn_Airbag_ID"
## [81] "Prsn_Helmet_ID" "Prsn_Alc_Spec_Type_ID"
## [83] "Prsn_Alc_Rslt_ID" "Prsn_Bac_Test_Rslt"
## [85] "Prsn_Drg_Spec_Type_ID" "Prsn_Drg_Rslt_ID"
## [87] "Drvr_Lic_Type_ID" "Drvr_Lic_State_ID"
## [89] "Drvr_Lic_Number" "Drvr_Lic_Cls_ID"
## [91] "Drvr_DOB" "Drvr_State_ID"
## [93] "Drvr_Zip" "Num_un"
## [95] "Num_pr" "OthrUnitNbr"
## [97] "OthrPrsn_Nbr" "OtherUnit_ID"
## [99] "OtherPerson_ID" "Crash_ID_2"
## [101] "Year_2" "Unit_Nbr"
## [103] "Unit_Desc_ID_2" "Veh_Lic_Plate_Nbr_2"
## [105] "VIN_2" "Veh_Mod_Year_2"
## [107] "Veh_Color_ID_2" "Veh_Make_ID_2"
## [109] "Veh_Mod_ID_2" "Veh_Body_Styl_ID_2"
## [111] "Emer_Respndr_Fl_2" "Veh_Damage_Description1_Id_2"
## [113] "Veh_Damage_Severity1_Id_2" "Veh_Cmv_Fl_2"
## [115] "Contrib_Factr_1_ID_2" "Contrib_Factr_2_ID_2"
## [117] "Pedestrian_Action_ID_2" "Pedalcyclist_Action_ID_2"
## [119] "PBCAT_Pedestrian_ID_2" "PBCAT_Pedalcyclist_ID_2"
## [121] "E_Scooter_ID_2" "Autonomous_Unit_ID_2"
## [123] "Rpt_Autonomous_Level_Engaged_ID_2" "Rpt_Autonomous_Unit_ID_2"
## [125] "Cmv_Hazmat_Fl_2" "Crash_ID_2_2"
## [127] "Year_2_2" "Unit_ID_2"
## [129] "Unit_Nbr_2" "Prsn_Nbr_2"
## [131] "Prsn_Type_ID_2" "Prsn_Occpnt_Pos_ID_2"
## [133] "Prsn_Injry_Sev_ID_2" "Prsn_Age_2"
## [135] "Prsn_Ethnicity_ID_2" "Prsn_Gndr_ID_2"
## [137] "Prsn_Ejct_ID_2" "Prsn_Rest_ID_2"
## [139] "Prsn_Airbag_ID_2" "Prsn_Helmet_ID_2"
## [141] "Prsn_Alc_Spec_Type_ID_2" "Prsn_Alc_Rslt_ID_2"
## [143] "Prsn_Bac_Test_Rslt_2" "Prsn_Drg_Spec_Type_ID_2"
## [145] "Prsn_Drg_Rslt_ID_2" "Drvr_Lic_Type_ID_2"
## [147] "Drvr_Lic_State_ID_2" "Drvr_Lic_Number_2"
## [149] "Drvr_Lic_Cls_ID_2" "Drvr_DOB_2"
## [151] "Drvr_State_ID_2" "Drvr_Zip_2"
dat1= dat[, c(1, 24, 25, 28, 35)]
data_unique <- dat1[!duplicated(dat1$Crash_ID), ]
dim(data_unique)
## [1] 71519 5
crash= data_unique
crash_sf <- crash %>%
filter(
!is.na(Longitude),
!is.na(Latitude),
Longitude < 0,
Latitude > 0
) %>%
st_as_sf(
coords = c("Longitude", "Latitude"),
crs = 4326,
remove = FALSE
)
# ------------------------------------------------------------
# Texas state boundary
# ------------------------------------------------------------
tx_boundary <- states(cb = TRUE, year = 2020, class = "sf") %>%
filter(STUSPS == "TX") %>%
st_transform(3083) %>%
st_make_valid()
# Project crash points
crash_tx <- crash_sf %>%
st_transform(3083)
# Keep only crashes inside Texas
crash_tx <- st_filter(crash_tx, tx_boundary)
# ------------------------------------------------------------
# Create approximately 80,000 square grids over Texas
# ------------------------------------------------------------
target_grid_count <- 80000
# Texas area in square meters
tx_area_m2 <- as.numeric(st_area(tx_boundary))
# Cell size in meters
cell_size_m <- sqrt(tx_area_m2 / target_grid_count)
cell_size_m
## [1] 2931.901
# ------------------------------------------------------------
# Build square fishnet grid
# ------------------------------------------------------------
tx_grid <- st_make_grid(
tx_boundary,
cellsize = cell_size_m,
square = TRUE
) %>%
st_sf(grid_id = seq_along(.), geometry = .)
# Clip grid to Texas boundary
tx_grid <- st_intersection(tx_grid, tx_boundary) %>%
select(grid_id, geometry)
# Check actual number of cells
nrow(tx_grid)
## [1] 81310
# ------------------------------------------------------------
# Spatially join crashes to grid cells
# ------------------------------------------------------------
crash_grid_join <- st_join(
crash_tx,
tx_grid,
join = st_within,
left = FALSE
)
# Count crashes per grid
grid_crash_counts <- crash_grid_join %>%
st_drop_geometry() %>%
count(grid_id, name = "crash_count")
# Join crash counts back to grid
tx_grid_crash_map <- tx_grid %>%
left_join(grid_crash_counts, by = "grid_id") %>%
mutate(
crash_count = ifelse(is.na(crash_count), 0, crash_count)
)
summary(tx_grid_crash_map$crash_count)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.6486 0.0000 586.0000
quantile(
tx_grid_crash_map$crash_count,
probs = c(0, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 1),
na.rm = TRUE
)
## 0% 25% 50% 75% 90% 95% 99% 100%
## 0 0 0 0 0 1 15 586
tx_grid_crash_map %>%
st_drop_geometry() %>%
count(crash_count, sort = FALSE) %>%
arrange(crash_count) %>%
head(30)
## crash_count n
## 1 0 75371
## 2 1 2542
## 3 2 777
## 4 3 412
## 5 4 292
## 6 5 224
## 7 6 157
## 8 7 166
## 9 8 116
## 10 9 98
## 11 10 88
## 12 11 51
## 13 12 75
## 14 13 52
## 15 14 56
## 16 15 53
## 17 16 38
## 18 17 33
## 19 18 29
## 20 19 18
## 21 20 34
## 22 21 38
## 23 22 21
## 24 23 23
## 25 24 27
## 26 25 25
## 27 26 17
## 28 27 13
## 29 28 18
## 30 29 21
# ------------------------------------------------------------
# Data-driven bins: zero separate + Jenks for positive counts
# ------------------------------------------------------------
positive_counts <- tx_grid_crash_map$crash_count[
!is.na(tx_grid_crash_map$crash_count) &
tx_grid_crash_map$crash_count > 0
]
n_classes <- 6
jenks_breaks <- classIntervals(
positive_counts,
n = n_classes,
style = "jenks"
)$brks
# Clean breaks
breaks_final <- sort(unique(c(0, round(jenks_breaks), Inf)))
breaks_final
## [1] 0 1 9 29 63 122 270 586 Inf
# ------------------------------------------------------------
# Create readable labels
# ------------------------------------------------------------
make_bin_labels <- function(breaks) {
labels <- character(length(breaks) - 1)
for (i in seq_along(labels)) {
lower <- breaks[i]
upper <- breaks[i + 1]
if (lower == 0 && upper == 1) {
labels[i] <- "0"
} else if (is.infinite(upper)) {
labels[i] <- paste0(lower + 1, "+")
} else {
labels[i] <- paste0(lower + 1, "–", upper)
}
}
labels
}
bin_labels <- make_bin_labels(breaks_final)
bin_labels
## [1] "0" "2–9" "10–29" "30–63" "64–122" "123–270" "271–586"
## [8] "587+"
tx_grid_crash_map <- tx_grid_crash_map %>%
mutate(
crash_count_bin = cut(
crash_count,
breaks = breaks_final,
labels = bin_labels,
include.lowest = TRUE,
right = TRUE
)
)
# Check number of grid cells in each bin
tx_grid_crash_map %>%
st_drop_geometry() %>%
count(crash_count_bin)
## crash_count_bin n
## 1 0 77913
## 2 2–9 2242
## 3 10–29 730
## 4 30–63 268
## 5 64–122 117
## 6 123–270 35
## 7 271–586 5
display.brewer.pal(9, "YlOrRd")

display.brewer.pal(9, "OrRd")

display.brewer.pal(9, "Reds")

palette_name <- "YlOrRd"
n_bins <- length(levels(tx_grid_crash_map$crash_count_bin))
map_colors <- colorRampPalette(
brewer.pal(9, palette_name)
)(n_bins)
# Make zero-crash grid cells light gray
map_colors[1] <- "gray92"
# ------------------------------------------------------------
# Static crash-count grid map
# ------------------------------------------------------------
ggplot() +
geom_sf(
data = tx_grid_crash_map,
aes(fill = crash_count_bin),
color = NA
) +
scale_fill_manual(
values = map_colors,
na.value = "transparent",
name = "Crash Count",
drop = FALSE
) +
labs(
title = "Texas Pedestrian Crashes by Square Grid",
subtitle = paste0(
"Crash counts aggregated to approximately ",
format(target_grid_count, big.mark = ","),
" square grid cells"
),
caption = "Data: Texas crash records; grid generated from Texas state boundary"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 9, color = "gray40"),
legend.position = "right",
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 9),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)

tx_grid_crash_only <- tx_grid_crash_map %>%
filter(crash_count > 0)
ggplot() +
geom_sf(
data = tx_boundary,
fill = "gray97",
color = "gray50",
linewidth = 0.25
) +
geom_sf(
data = tx_grid_crash_only,
aes(fill = crash_count_bin),
color = NA
) +
scale_fill_manual(
values = map_colors,
na.value = "transparent",
name = "Crash Count",
drop = FALSE
) +
labs(
title = "Texas Pedestrian Crash Concentrations by Square Grid",
subtitle = "Only grid cells with at least one crash are shown",
caption = "Data: Texas crash records; grid generated from Texas state boundary"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
legend.position = "right",
legend.title = element_text(face = "bold"),
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)

ggplot() +
geom_sf(
data = tx_boundary,
fill = "gray97",
color = "gray50",
linewidth = 0.25
) +
geom_sf(
data = tx_grid_crash_only,
aes(fill = crash_count_bin),
color = NA
) +
geom_sf(
data = crash_tx,
size = 0.15,
alpha = 0.20,
color = "black"
) +
scale_fill_manual(
values = map_colors,
na.value = "transparent",
name = "Crash Count",
drop = FALSE
) +
labs(
title = "Texas Pedestrian Crashes and Square-Grid Hotspots",
subtitle = "Crash points overlaid on grid-level crash counts"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
legend.position = "right",
panel.grid = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank()
)

# ------------------------------------------------------------
# Prepare data for leaflet
# ------------------------------------------------------------
tx_grid_leaflet <- tx_grid_crash_map %>%
st_transform(4326)
tx_grid_leaflet_crash_only <- tx_grid_leaflet %>%
filter(crash_count > 0)
crash_leaflet <- crash_tx %>%
st_transform(4326)
tx_boundary_leaflet <- tx_boundary %>%
st_transform(4326)
tx_grid_leaflet_crash_only <- tx_grid_leaflet_crash_only %>%
mutate(
popup_text = paste0(
"<b>Grid ID:</b> ", grid_id, "<br>",
"<b>Crash Count:</b> ", crash_count, "<br>",
"<b>Crash Count Bin:</b> ", crash_count_bin
)
)
pal_leaflet <- colorFactor(
palette = map_colors,
domain = levels(tx_grid_crash_map$crash_count_bin),
na.color = "transparent"
)
# ------------------------------------------------------------
# Interactive leaflet map
# ------------------------------------------------------------
leaflet_grid_map <- leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = tx_boundary_leaflet,
fill = FALSE,
color = "gray40",
weight = 1,
opacity = 0.8,
group = "Texas Boundary"
) %>%
addPolygons(
data = tx_grid_leaflet_crash_only,
fillColor = ~pal_leaflet(crash_count_bin),
fillOpacity = 0.75,
color = "white",
weight = 0.15,
opacity = 0.7,
popup = ~popup_text,
label = ~paste0("Grid ", grid_id, ": ", crash_count, " crashes"),
highlightOptions = highlightOptions(
weight = 2,
color = "#333333",
fillOpacity = 0.90,
bringToFront = TRUE
),
group = "Crash Grid"
) %>%
addCircleMarkers(
data = crash_leaflet,
lng = ~Longitude,
lat = ~Latitude,
radius = 1.5,
stroke = FALSE,
fillOpacity = 0.25,
color = "black",
popup = ~paste0(
"<b>Crash ID:</b> ", Crash_ID, "<br>",
"<b>Latitude:</b> ", Latitude, "<br>",
"<b>Longitude:</b> ", Longitude
),
group = "Crash Points"
) %>%
addLegend(
position = "bottomright",
pal = pal_leaflet,
values = levels(tx_grid_crash_map$crash_count_bin),
title = "Crash Count",
opacity = 0.8
) %>%
addLayersControl(
overlayGroups = c("Texas Boundary", "Crash Grid", "Crash Points"),
options = layersControlOptions(collapsed = FALSE)
)
leaflet_grid_map
# Save spatial grid
#st_write(
# tx_grid_crash_map,
# "tx_pedestrian_crashes_80000_grid.gpkg",
# delete_dsn = TRUE
#)
# Save non-spatial table
#tx_grid_crash_map %>%
# st_drop_geometry() %>%
# write_csv("tx_pedestrian_crashes_80000_grid_counts.csv")