Pedestrian Crash
Read Data
setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/2026/2027_TRBAM/Data/OtherData")
library(readxl)
library(ggplot2)
library(sf)
library(dplyr)
library(tigris)
library(readr)
library(leaflet)
dat= read_excel("Pedestrian2017_2025a.xlsx")
dim(dat)## [1] 76463 152
## [1] "Crash_ID" "Crash_Date"
## [3] "Crash_Time" "Crash_Speed_Limit"
## [5] "Wthr_Cond_ID" "Light_Cond_ID"
## [7] "Entr_Road_ID" "Road_Type_ID"
## [9] "Road_Algn_ID" "Surf_Cond_ID"
## [11] "Traffic_Cntl_ID" "Investigat_Notify_Time"
## [13] "Investigat_Arrv_Time" "Harm_Evnt_ID"
## [15] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [17] "Obj_Struck_ID" "Othr_Factr_ID"
## [19] "Road_Part_Adj_ID" "Road_Cls_ID"
## [21] "Road_Relat_ID" "Cnty_ID"
## [23] "City_ID" "Latitude"
## [25] "Longitude" "Onsys_Fl"
## [27] "Rural_Fl" "Crash_Sev_ID"
## [29] "Pop_Group_ID" "Day_of_Week"
## [31] "Rural_Urban_Type_ID" "Func_Sys_ID"
## [33] "Adt_Curnt_Amt" "Adt_Curnt_Year"
## [35] "Year" "Investigator_Narrative"
## [37] "Tot_Injry_Cnt" "Death_Cnt"
## [39] "Sus_Serious_Injry_Cnt" "Secondary_Crash_Fl"
## [41] "Yr_Un" "Unit_ID"
## [43] "UnitNbr_Un" "Unit_Desc_ID"
## [45] "Veh_Lic_Plate_Nbr" "VIN"
## [47] "Veh_Mod_Year" "Veh_Color_ID"
## [49] "Veh_Make_ID" "Veh_Mod_ID"
## [51] "Veh_Body_Styl_ID" "Emer_Respndr_Fl"
## [53] "Veh_Damage_Description1_Id" "Veh_Damage_Severity1_Id"
## [55] "Veh_Cmv_Fl" "Contrib_Factr_1_ID"
## [57] "Contrib_Factr_2_ID" "Pedestrian_Action_ID"
## [59] "Pedalcyclist_Action_ID" "PBCAT_Pedestrian_ID"
## [61] "PBCAT_Pedalcyclist_ID" "E_Scooter_ID"
## [63] "Autonomous_Unit_ID" "Rpt_Autonomous_Level_Engaged_ID"
## [65] "Rpt_Autonomous_Unit_ID" "Cmv_Hazmat_Fl"
## [67] "Crash_ID_Pr" "Yr_Pr"
## [69] "UnitNbr_Pr" "Prsn_Nbr"
## [71] "Person_ID" "Prsn_Type_ID"
## [73] "Prsn_Occpnt_Pos_ID" "Prsn_Injry_Sev_ID"
## [75] "Prsn_Age" "Prsn_Ethnicity_ID"
## [77] "Prsn_Gndr_ID" "Prsn_Ejct_ID"
## [79] "Prsn_Rest_ID" "Prsn_Airbag_ID"
## [81] "Prsn_Helmet_ID" "Prsn_Alc_Spec_Type_ID"
## [83] "Prsn_Alc_Rslt_ID" "Prsn_Bac_Test_Rslt"
## [85] "Prsn_Drg_Spec_Type_ID" "Prsn_Drg_Rslt_ID"
## [87] "Drvr_Lic_Type_ID" "Drvr_Lic_State_ID"
## [89] "Drvr_Lic_Number" "Drvr_Lic_Cls_ID"
## [91] "Drvr_DOB" "Drvr_State_ID"
## [93] "Drvr_Zip" "Num_un"
## [95] "Num_pr" "OthrUnitNbr"
## [97] "OthrPrsn_Nbr" "OtherUnit_ID"
## [99] "OtherPerson_ID" "Crash_ID_2"
## [101] "Year_2" "Unit_Nbr"
## [103] "Unit_Desc_ID_2" "Veh_Lic_Plate_Nbr_2"
## [105] "VIN_2" "Veh_Mod_Year_2"
## [107] "Veh_Color_ID_2" "Veh_Make_ID_2"
## [109] "Veh_Mod_ID_2" "Veh_Body_Styl_ID_2"
## [111] "Emer_Respndr_Fl_2" "Veh_Damage_Description1_Id_2"
## [113] "Veh_Damage_Severity1_Id_2" "Veh_Cmv_Fl_2"
## [115] "Contrib_Factr_1_ID_2" "Contrib_Factr_2_ID_2"
## [117] "Pedestrian_Action_ID_2" "Pedalcyclist_Action_ID_2"
## [119] "PBCAT_Pedestrian_ID_2" "PBCAT_Pedalcyclist_ID_2"
## [121] "E_Scooter_ID_2" "Autonomous_Unit_ID_2"
## [123] "Rpt_Autonomous_Level_Engaged_ID_2" "Rpt_Autonomous_Unit_ID_2"
## [125] "Cmv_Hazmat_Fl_2" "Crash_ID_2_2"
## [127] "Year_2_2" "Unit_ID_2"
## [129] "Unit_Nbr_2" "Prsn_Nbr_2"
## [131] "Prsn_Type_ID_2" "Prsn_Occpnt_Pos_ID_2"
## [133] "Prsn_Injry_Sev_ID_2" "Prsn_Age_2"
## [135] "Prsn_Ethnicity_ID_2" "Prsn_Gndr_ID_2"
## [137] "Prsn_Ejct_ID_2" "Prsn_Rest_ID_2"
## [139] "Prsn_Airbag_ID_2" "Prsn_Helmet_ID_2"
## [141] "Prsn_Alc_Spec_Type_ID_2" "Prsn_Alc_Rslt_ID_2"
## [143] "Prsn_Bac_Test_Rslt_2" "Prsn_Drg_Spec_Type_ID_2"
## [145] "Prsn_Drg_Rslt_ID_2" "Drvr_Lic_Type_ID_2"
## [147] "Drvr_Lic_State_ID_2" "Drvr_Lic_Number_2"
## [149] "Drvr_Lic_Cls_ID_2" "Drvr_DOB_2"
## [151] "Drvr_State_ID_2" "Drvr_Zip_2"
dat1= dat[, c(1, 24, 25, 28, 35)]
data_unique <- dat1[!duplicated(dat1$Crash_ID), ]
dim(data_unique)## [1] 71519 5
crash_sf <- data_unique %>%
filter(
!is.na(Longitude),
!is.na(Latitude),
Longitude < 0,
Latitude > 0
) %>%
st_as_sf(
coords = c("Longitude", "Latitude"),
crs = 4326,
remove = FALSE
)
# ------------------------------------------------------------
# 2. Download 2020 Census Block Groups for Texas
# ------------------------------------------------------------
tx_bg_2020 <- block_groups(
state = "TX",
year = 2020,
cb = TRUE,
class = "sf"
)## | | | 0% | | | 1% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |== | 4% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 6% | |===== | 7% | |===== | 8% | |====== | 8% | |====== | 9% | |======= | 9% | |======= | 10% | |======= | 11% | |======== | 11% | |======== | 12% | |========= | 12% | |========= | 13% | |========= | 14% | |========== | 14% | |========== | 15% | |=========== | 15% | |=========== | 16% | |============ | 16% | |============ | 17% | |============ | 18% | |============= | 18% | |============= | 19% | |============== | 19% | |============== | 20% | |============== | 21% | |=============== | 21% | |=============== | 22% | |================ | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |=================== | 28% | |==================== | 28% | |==================== | 29% | |===================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |====================== | 32% | |======================= | 32% | |======================= | 33% | |======================= | 34% | |======================== | 34% | |======================== | 35% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |========================== | 38% | |=========================== | 38% | |=========================== | 39% | |============================ | 39% | |============================ | 40% | |============================ | 41% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 49% | |=================================== | 50% | |=================================== | 51% | |==================================== | 51% | |==================================== | 52% | |===================================== | 52% | |===================================== | 53% | |====================================== | 54% | |====================================== | 55% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |========================================== | 61% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 62% | |============================================ | 63% | |============================================= | 64% | |============================================= | 65% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 66% | |=============================================== | 67% | |=============================================== | 68% | |================================================ | 68% | |================================================ | 69% | |================================================= | 69% | |================================================= | 70% | |================================================= | 71% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 85% | |============================================================ | 86% | |============================================================= | 86% | |============================================================= | 87% | |============================================================= | 88% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 89% | |=============================================================== | 90% | |=============================================================== | 91% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 99% | |======================================================================| 100%
# Keep useful variables
tx_bg_2020 <- tx_bg_2020 %>%
select(
GEOID,
STATEFP,
COUNTYFP,
TRACTCE,
BLKGRPCE,
NAMELSAD,
geometry
)
# Make sure Census block group CRS is valid
tx_bg_2020 <- st_make_valid(tx_bg_2020)
# If crash_sf CRS is missing, assign it
if (is.na(st_crs(crash_sf))) {
st_crs(crash_sf) <- 4326
}
# Transform crash points to match block group CRS
crash_sf <- st_transform(crash_sf, st_crs(tx_bg_2020))
# Confirm CRS now matches
st_crs(crash_sf) == st_crs(tx_bg_2020)## [1] TRUE
# Join crashes to block groups
crash_with_bg <- st_join(
crash_sf,
tx_bg_2020,
join = st_intersects,
left = TRUE
)
##crash_with_bg %>%
## st_drop_geometry() %>%
## write_csv("tx_crashes_with_2020_blockgroup.csv")
# Save as spatial file
##st_write(
##crash_with_bg,
## "tx_crashes_with_2020_blockgroup.gpkg",
## delete_dsn = TRUE
#)
# ------------------------------------------------------------
# Count crashes by 2020 Census Block Group
# ------------------------------------------------------------Map
bg_crash_counts <- crash_with_bg %>%
st_drop_geometry() %>%
filter(!is.na(GEOID)) %>%
count(GEOID, name = "crash_count")
# Join counts back to block group polygons
tx_bg_crash_map <- tx_bg_2020 %>%
left_join(bg_crash_counts, by = "GEOID") %>%
mutate(
crash_count = ifelse(is.na(crash_count), 0, crash_count)
)
# ------------------------------------------------------------
# Static block group crash-count map
# No viridis used
# ------------------------------------------------------------
ggplot() +
geom_sf(
data = tx_bg_crash_map,
aes(fill = crash_count),
color = NA
) +
scale_fill_distiller(
palette = "YlOrRd",
direction = 1,
name = "Crash Count"
) +
labs(
title = "Texas Pedestrian Crashes by 2020 Census Block Group",
subtitle = "Crash counts aggregated to U.S. Census block groups",
caption = "Data: Texas crash records and 2020 U.S. Census block groups"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12),
legend.position = "right",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)Check a County
# ------------------------------------------------------------
# Example: county-level map
# Travis County FIPS = 453
# ------------------------------------------------------------
county_map <- tx_bg_crash_map %>%
filter(COUNTYFP == "453")
crash_county <- crash_with_bg %>%
filter(COUNTYFP == "453")
ggplot() +
geom_sf(
data = county_map,
aes(fill = crash_count),
color = "white",
linewidth = 0.05
) +
geom_sf(
data = crash_county,
size = 0.4,
alpha = 0.35
) +
scale_fill_distiller(
palette = "YlOrRd",
direction = 1,
name = "Crash Count"
) +
labs(
title = "Pedestrian Crash Hotspots by 2020 Census Block Group",
subtitle = "Example shown for Travis County, Texas"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold"),
panel.grid = element_blank()
)Leaflet Map
# ------------------------------------------------------------
# Leaflet interactive map
# ------------------------------------------------------------
# Leaflet works best in WGS84
tx_bg_leaflet <- tx_bg_crash_map %>%
st_transform(4326)
crash_leaflet <- crash_with_bg %>%
st_transform(4326)
# Color bins
pal <- colorBin(
palette = "YlOrRd",
domain = tx_bg_leaflet$crash_count,
bins = c(0, 5, 10, 20, 30, 50, 700, 100, 150, 200, Inf),
na.color = "transparent"
)
# Popup text
tx_bg_leaflet <- tx_bg_leaflet %>%
mutate(
popup_text = paste0(
"<b>Block Group GEOID:</b> ", GEOID, "<br>",
"<b>County FIPS:</b> ", COUNTYFP, "<br>",
"<b>Crash Count:</b> ", crash_count
)
)
leaflet_map <- leaflet(tx_bg_leaflet) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
fillColor = ~pal(crash_count),
fillOpacity = 0.75,
color = "white",
weight = 0.2,
opacity = 0.8,
popup = ~popup_text,
label = ~paste0("GEOID: ", GEOID, " | Crashes: ", crash_count),
highlightOptions = highlightOptions(
weight = 2,
color = "#333333",
fillOpacity = 0.9,
bringToFront = TRUE
)
) %>%
addCircleMarkers(
data = crash_leaflet,
lng = ~Longitude,
lat = ~Latitude,
radius = 2,
stroke = FALSE,
fillOpacity = 0.35,
popup = ~paste0(
"<b>Crash ID:</b> ", Crash_ID, "<br>",
"<b>Latitude:</b> ", Latitude, "<br>",
"<b>Longitude:</b> ", Longitude
)
) %>%
addLegend(
pal = pal,
values = ~crash_count,
title = "Crash Count",
opacity = 0.8,
position = "bottomright"
)
leaflet_map