Ped Crash 02

library(sf)
library(dplyr)
library(ggplot2)
library(tigris)
library(readr)
library(RColorBrewer)
library(classInt)
library(leaflet)
library(htmlwidgets)

options(tigris_use_cache = TRUE)


setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/2026/2027_TRBAM/Data/OtherData")
library(readxl)

dat= read_excel("Pedestrian2017_2025a.xlsx")
dim(dat)
## [1] 76463   152
names(dat)
##   [1] "Crash_ID"                          "Crash_Date"                       
##   [3] "Crash_Time"                        "Crash_Speed_Limit"                
##   [5] "Wthr_Cond_ID"                      "Light_Cond_ID"                    
##   [7] "Entr_Road_ID"                      "Road_Type_ID"                     
##   [9] "Road_Algn_ID"                      "Surf_Cond_ID"                     
##  [11] "Traffic_Cntl_ID"                   "Investigat_Notify_Time"           
##  [13] "Investigat_Arrv_Time"              "Harm_Evnt_ID"                     
##  [15] "Intrsct_Relat_ID"                  "FHE_Collsn_ID"                    
##  [17] "Obj_Struck_ID"                     "Othr_Factr_ID"                    
##  [19] "Road_Part_Adj_ID"                  "Road_Cls_ID"                      
##  [21] "Road_Relat_ID"                     "Cnty_ID"                          
##  [23] "City_ID"                           "Latitude"                         
##  [25] "Longitude"                         "Onsys_Fl"                         
##  [27] "Rural_Fl"                          "Crash_Sev_ID"                     
##  [29] "Pop_Group_ID"                      "Day_of_Week"                      
##  [31] "Rural_Urban_Type_ID"               "Func_Sys_ID"                      
##  [33] "Adt_Curnt_Amt"                     "Adt_Curnt_Year"                   
##  [35] "Year"                              "Investigator_Narrative"           
##  [37] "Tot_Injry_Cnt"                     "Death_Cnt"                        
##  [39] "Sus_Serious_Injry_Cnt"             "Secondary_Crash_Fl"               
##  [41] "Yr_Un"                             "Unit_ID"                          
##  [43] "UnitNbr_Un"                        "Unit_Desc_ID"                     
##  [45] "Veh_Lic_Plate_Nbr"                 "VIN"                              
##  [47] "Veh_Mod_Year"                      "Veh_Color_ID"                     
##  [49] "Veh_Make_ID"                       "Veh_Mod_ID"                       
##  [51] "Veh_Body_Styl_ID"                  "Emer_Respndr_Fl"                  
##  [53] "Veh_Damage_Description1_Id"        "Veh_Damage_Severity1_Id"          
##  [55] "Veh_Cmv_Fl"                        "Contrib_Factr_1_ID"               
##  [57] "Contrib_Factr_2_ID"                "Pedestrian_Action_ID"             
##  [59] "Pedalcyclist_Action_ID"            "PBCAT_Pedestrian_ID"              
##  [61] "PBCAT_Pedalcyclist_ID"             "E_Scooter_ID"                     
##  [63] "Autonomous_Unit_ID"                "Rpt_Autonomous_Level_Engaged_ID"  
##  [65] "Rpt_Autonomous_Unit_ID"            "Cmv_Hazmat_Fl"                    
##  [67] "Crash_ID_Pr"                       "Yr_Pr"                            
##  [69] "UnitNbr_Pr"                        "Prsn_Nbr"                         
##  [71] "Person_ID"                         "Prsn_Type_ID"                     
##  [73] "Prsn_Occpnt_Pos_ID"                "Prsn_Injry_Sev_ID"                
##  [75] "Prsn_Age"                          "Prsn_Ethnicity_ID"                
##  [77] "Prsn_Gndr_ID"                      "Prsn_Ejct_ID"                     
##  [79] "Prsn_Rest_ID"                      "Prsn_Airbag_ID"                   
##  [81] "Prsn_Helmet_ID"                    "Prsn_Alc_Spec_Type_ID"            
##  [83] "Prsn_Alc_Rslt_ID"                  "Prsn_Bac_Test_Rslt"               
##  [85] "Prsn_Drg_Spec_Type_ID"             "Prsn_Drg_Rslt_ID"                 
##  [87] "Drvr_Lic_Type_ID"                  "Drvr_Lic_State_ID"                
##  [89] "Drvr_Lic_Number"                   "Drvr_Lic_Cls_ID"                  
##  [91] "Drvr_DOB"                          "Drvr_State_ID"                    
##  [93] "Drvr_Zip"                          "Num_un"                           
##  [95] "Num_pr"                            "OthrUnitNbr"                      
##  [97] "OthrPrsn_Nbr"                      "OtherUnit_ID"                     
##  [99] "OtherPerson_ID"                    "Crash_ID_2"                       
## [101] "Year_2"                            "Unit_Nbr"                         
## [103] "Unit_Desc_ID_2"                    "Veh_Lic_Plate_Nbr_2"              
## [105] "VIN_2"                             "Veh_Mod_Year_2"                   
## [107] "Veh_Color_ID_2"                    "Veh_Make_ID_2"                    
## [109] "Veh_Mod_ID_2"                      "Veh_Body_Styl_ID_2"               
## [111] "Emer_Respndr_Fl_2"                 "Veh_Damage_Description1_Id_2"     
## [113] "Veh_Damage_Severity1_Id_2"         "Veh_Cmv_Fl_2"                     
## [115] "Contrib_Factr_1_ID_2"              "Contrib_Factr_2_ID_2"             
## [117] "Pedestrian_Action_ID_2"            "Pedalcyclist_Action_ID_2"         
## [119] "PBCAT_Pedestrian_ID_2"             "PBCAT_Pedalcyclist_ID_2"          
## [121] "E_Scooter_ID_2"                    "Autonomous_Unit_ID_2"             
## [123] "Rpt_Autonomous_Level_Engaged_ID_2" "Rpt_Autonomous_Unit_ID_2"         
## [125] "Cmv_Hazmat_Fl_2"                   "Crash_ID_2_2"                     
## [127] "Year_2_2"                          "Unit_ID_2"                        
## [129] "Unit_Nbr_2"                        "Prsn_Nbr_2"                       
## [131] "Prsn_Type_ID_2"                    "Prsn_Occpnt_Pos_ID_2"             
## [133] "Prsn_Injry_Sev_ID_2"               "Prsn_Age_2"                       
## [135] "Prsn_Ethnicity_ID_2"               "Prsn_Gndr_ID_2"                   
## [137] "Prsn_Ejct_ID_2"                    "Prsn_Rest_ID_2"                   
## [139] "Prsn_Airbag_ID_2"                  "Prsn_Helmet_ID_2"                 
## [141] "Prsn_Alc_Spec_Type_ID_2"           "Prsn_Alc_Rslt_ID_2"               
## [143] "Prsn_Bac_Test_Rslt_2"              "Prsn_Drg_Spec_Type_ID_2"          
## [145] "Prsn_Drg_Rslt_ID_2"                "Drvr_Lic_Type_ID_2"               
## [147] "Drvr_Lic_State_ID_2"               "Drvr_Lic_Number_2"                
## [149] "Drvr_Lic_Cls_ID_2"                 "Drvr_DOB_2"                       
## [151] "Drvr_State_ID_2"                   "Drvr_Zip_2"
dat1= dat[, c(1, 24, 25, 28, 35)]
data_unique <- dat1[!duplicated(dat1$Crash_ID), ]
dim(data_unique)
## [1] 71519     5
crash= data_unique

crash_sf <- crash %>%
  filter(
    !is.na(Longitude),
    !is.na(Latitude),
    Longitude < 0,
    Latitude > 0
  ) %>%
  st_as_sf(
    coords = c("Longitude", "Latitude"),
    crs = 4326,
    remove = FALSE
  )

# ------------------------------------------------------------
# Texas state boundary
# ------------------------------------------------------------

tx_boundary <- states(cb = TRUE, year = 2020, class = "sf") %>%
  filter(STUSPS == "TX") %>%
  st_transform(3083) %>%
  st_make_valid()

# Project crash points
crash_tx <- crash_sf %>%
  st_transform(3083)

# Keep only crashes inside Texas
crash_tx <- st_filter(crash_tx, tx_boundary)

# ------------------------------------------------------------
# Create approximately 80,000 square grids over Texas
# ------------------------------------------------------------

target_grid_count <- 80000

# Texas area in square meters
tx_area_m2 <- as.numeric(st_area(tx_boundary))

# Cell size in meters
cell_size_m <- sqrt(tx_area_m2 / target_grid_count)

cell_size_m
## [1] 2931.901
# ------------------------------------------------------------
# Build square fishnet grid
# ------------------------------------------------------------

tx_grid <- st_make_grid(
  tx_boundary,
  cellsize = cell_size_m,
  square = TRUE
) %>%
  st_sf(grid_id = seq_along(.), geometry = .)

# Clip grid to Texas boundary
tx_grid <- st_intersection(tx_grid, tx_boundary) %>%
  select(grid_id, geometry)

# Check actual number of cells
nrow(tx_grid)
## [1] 81310
# ------------------------------------------------------------
# Spatially join crashes to grid cells
# ------------------------------------------------------------

crash_grid_join <- st_join(
  crash_tx,
  tx_grid,
  join = st_within,
  left = FALSE
)

# Count crashes per grid
grid_crash_counts <- crash_grid_join %>%
  st_drop_geometry() %>%
  count(grid_id, name = "crash_count")

# Join crash counts back to grid
tx_grid_crash_map <- tx_grid %>%
  left_join(grid_crash_counts, by = "grid_id") %>%
  mutate(
    crash_count = ifelse(is.na(crash_count), 0, crash_count)
  )

summary(tx_grid_crash_map$crash_count)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   0.0000   0.0000   0.0000   0.6486   0.0000 586.0000
quantile(
  tx_grid_crash_map$crash_count,
  probs = c(0, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 1),
  na.rm = TRUE
)
##   0%  25%  50%  75%  90%  95%  99% 100% 
##    0    0    0    0    0    1   15  586
tx_grid_crash_map %>%
  st_drop_geometry() %>%
  count(crash_count, sort = FALSE) %>%
  arrange(crash_count) %>%
  head(30)
##    crash_count     n
## 1            0 75371
## 2            1  2542
## 3            2   777
## 4            3   412
## 5            4   292
## 6            5   224
## 7            6   157
## 8            7   166
## 9            8   116
## 10           9    98
## 11          10    88
## 12          11    51
## 13          12    75
## 14          13    52
## 15          14    56
## 16          15    53
## 17          16    38
## 18          17    33
## 19          18    29
## 20          19    18
## 21          20    34
## 22          21    38
## 23          22    21
## 24          23    23
## 25          24    27
## 26          25    25
## 27          26    17
## 28          27    13
## 29          28    18
## 30          29    21
# ------------------------------------------------------------
# Data-driven bins: zero separate + Jenks for positive counts
# ------------------------------------------------------------

positive_counts <- tx_grid_crash_map$crash_count[
  !is.na(tx_grid_crash_map$crash_count) &
    tx_grid_crash_map$crash_count > 0
]

n_classes <- 6

jenks_breaks <- classIntervals(
  positive_counts,
  n = n_classes,
  style = "jenks"
)$brks

# Clean breaks
breaks_final <- sort(unique(c(0, round(jenks_breaks), Inf)))

breaks_final
## [1]   0   1   9  29  63 122 270 586 Inf
# ------------------------------------------------------------
# Create readable labels
# ------------------------------------------------------------

make_bin_labels <- function(breaks) {
  
  labels <- character(length(breaks) - 1)
  
  for (i in seq_along(labels)) {
    
    lower <- breaks[i]
    upper <- breaks[i + 1]
    
    if (lower == 0 && upper == 1) {
      labels[i] <- "0"
    } else if (is.infinite(upper)) {
      labels[i] <- paste0(lower + 1, "+")
    } else {
      labels[i] <- paste0(lower + 1, "–", upper)
    }
  }
  
  labels
}

bin_labels <- make_bin_labels(breaks_final)

bin_labels
## [1] "0"       "2–9"     "10–29"   "30–63"   "64–122"  "123–270" "271–586"
## [8] "587+"
tx_grid_crash_map <- tx_grid_crash_map %>%
  mutate(
    crash_count_bin = cut(
      crash_count,
      breaks = breaks_final,
      labels = bin_labels,
      include.lowest = TRUE,
      right = TRUE
    )
  )

# Check number of grid cells in each bin
tx_grid_crash_map %>%
  st_drop_geometry() %>%
  count(crash_count_bin)
##   crash_count_bin     n
## 1               0 77913
## 2             2–9  2242
## 3           10–29   730
## 4           30–63   268
## 5          64–122   117
## 6         123–270    35
## 7         271–586     5
display.brewer.pal(9, "YlOrRd")

display.brewer.pal(9, "OrRd")

display.brewer.pal(9, "Reds")

palette_name <- "YlOrRd"

n_bins <- length(levels(tx_grid_crash_map$crash_count_bin))

map_colors <- colorRampPalette(
  brewer.pal(9, palette_name)
)(n_bins)

# Make zero-crash grid cells light gray
map_colors[1] <- "gray92"

# ------------------------------------------------------------
# Static crash-count grid map
# ------------------------------------------------------------

ggplot() +
  geom_sf(
    data = tx_grid_crash_map,
    aes(fill = crash_count_bin),
    color = NA
  ) +
  scale_fill_manual(
    values = map_colors,
    na.value = "transparent",
    name = "Crash Count",
    drop = FALSE
  ) +
  labs(
    title = "Texas Pedestrian Crashes by Square Grid",
    subtitle = paste0(
      "Crash counts aggregated to approximately ",
      format(target_grid_count, big.mark = ","),
      " square grid cells"
    ),
    caption = "Data: Texas crash records; grid generated from Texas state boundary"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    plot.caption = element_text(size = 9, color = "gray40"),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 9),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank()
  )

tx_grid_crash_only <- tx_grid_crash_map %>%
  filter(crash_count > 0)

ggplot() +
  geom_sf(
    data = tx_boundary,
    fill = "gray97",
    color = "gray50",
    linewidth = 0.25
  ) +
  geom_sf(
    data = tx_grid_crash_only,
    aes(fill = crash_count_bin),
    color = NA
  ) +
  scale_fill_manual(
    values = map_colors,
    na.value = "transparent",
    name = "Crash Count",
    drop = FALSE
  ) +
  labs(
    title = "Texas Pedestrian Crash Concentrations by Square Grid",
    subtitle = "Only grid cells with at least one crash are shown",
    caption = "Data: Texas crash records; grid generated from Texas state boundary"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    legend.position = "right",
    legend.title = element_text(face = "bold"),
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank()
  )

ggplot() +
  geom_sf(
    data = tx_boundary,
    fill = "gray97",
    color = "gray50",
    linewidth = 0.25
  ) +
  geom_sf(
    data = tx_grid_crash_only,
    aes(fill = crash_count_bin),
    color = NA
  ) +
  geom_sf(
    data = crash_tx,
    size = 0.15,
    alpha = 0.20,
    color = "black"
  ) +
  scale_fill_manual(
    values = map_colors,
    na.value = "transparent",
    name = "Crash Count",
    drop = FALSE
  ) +
  labs(
    title = "Texas Pedestrian Crashes and Square-Grid Hotspots",
    subtitle = "Crash points overlaid on grid-level crash counts"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12),
    legend.position = "right",
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank()
  )

# ------------------------------------------------------------
# Prepare data for leaflet
# ------------------------------------------------------------

tx_grid_leaflet <- tx_grid_crash_map %>%
  st_transform(4326)

tx_grid_leaflet_crash_only <- tx_grid_leaflet %>%
  filter(crash_count > 0)

crash_leaflet <- crash_tx %>%
  st_transform(4326)

tx_boundary_leaflet <- tx_boundary %>%
  st_transform(4326)

tx_grid_leaflet_crash_only <- tx_grid_leaflet_crash_only %>%
  mutate(
    popup_text = paste0(
      "<b>Grid ID:</b> ", grid_id, "<br>",
      "<b>Crash Count:</b> ", crash_count, "<br>",
      "<b>Crash Count Bin:</b> ", crash_count_bin
    )
  )

pal_leaflet <- colorFactor(
  palette = map_colors,
  domain = levels(tx_grid_crash_map$crash_count_bin),
  na.color = "transparent"
)


# ------------------------------------------------------------
# Interactive leaflet map
# ------------------------------------------------------------

leaflet_grid_map <- leaflet() %>%
  addProviderTiles(providers$CartoDB.Positron) %>%
  
  addPolygons(
    data = tx_boundary_leaflet,
    fill = FALSE,
    color = "gray40",
    weight = 1,
    opacity = 0.8,
    group = "Texas Boundary"
  ) %>%
  
  addPolygons(
    data = tx_grid_leaflet_crash_only,
    fillColor = ~pal_leaflet(crash_count_bin),
    fillOpacity = 0.75,
    color = "white",
    weight = 0.15,
    opacity = 0.7,
    popup = ~popup_text,
    label = ~paste0("Grid ", grid_id, ": ", crash_count, " crashes"),
    highlightOptions = highlightOptions(
      weight = 2,
      color = "#333333",
      fillOpacity = 0.90,
      bringToFront = TRUE
    ),
    group = "Crash Grid"
  ) %>%
  
  addCircleMarkers(
    data = crash_leaflet,
    lng = ~Longitude,
    lat = ~Latitude,
    radius = 1.5,
    stroke = FALSE,
    fillOpacity = 0.25,
    color = "black",
    popup = ~paste0(
      "<b>Crash ID:</b> ", Crash_ID, "<br>",
      "<b>Latitude:</b> ", Latitude, "<br>",
      "<b>Longitude:</b> ", Longitude
    ),
    group = "Crash Points"
  ) %>%
  
  addLegend(
    position = "bottomright",
    pal = pal_leaflet,
    values = levels(tx_grid_crash_map$crash_count_bin),
    title = "Crash Count",
    opacity = 0.8
  ) %>%
  
  addLayersControl(
    overlayGroups = c("Texas Boundary", "Crash Grid", "Crash Points"),
    options = layersControlOptions(collapsed = FALSE)
  )

leaflet_grid_map
# Save spatial grid
#st_write(
#  tx_grid_crash_map,
 # "tx_pedestrian_crashes_80000_grid.gpkg",
 # delete_dsn = TRUE
#)

# Save non-spatial table
#tx_grid_crash_map %>%
 # st_drop_geometry() %>%
 # write_csv("tx_pedestrian_crashes_80000_grid_counts.csv")