Emer Responder
Load Libraries
Data Loading & Preparation
# Set working directory - change as needed
setwd("C:/Users/mvx13/OneDrive - Texas State University/Papers/TRB 2026/EmergencyResponder")
# Load data
dat <- read_excel("CRIS_EmerRespndrFl-Y-2025-07-03-221624.xlsx", sheet = "UnitL4")
# Drop unwanted columns
drop_cols <- c(1, 3, 4, 5, 6, 20, 26, 30, 21, 33, 34, 35, 36, 37, 40, 43:45)
dat <- dat[, -drop_cols]
# Convert character to factor
dat <- dat %>% mutate(across(where(is.character), as.factor))
# Optional - Check data
glimpse(dat)
## Rows: 39,379
## Columns: 27
## $ Crash_ID2 <fct> TX_2017_15516417_3, TX_2017_15517391_1, TX_2017_1…
## $ Wthr_Cond_ID <fct> Rain, Clear, Rain, Clear, Rain, Rain, Clear, Clea…
## $ Light_Cond_ID <fct> "Daylight", "Daylight", "Daylight", "Daylight", "…
## $ Road_Type_ID <fct> "Unknown", "4 Or More Lanes, Divided", "Unknown",…
## $ Road_Algn_ID <fct> "Curve, Level", "Straight, Level", "Straight, Lev…
## $ Surf_Cond_ID <fct> "Wet", "Dry", "Wet", "Dry", "Wet", "Wet", "Dry", …
## $ Traffic_Cntl_ID <fct> Other, Signal Light, Marked Lanes, Signal Light, …
## $ Harm_Evnt_ID <fct> Motor Vehicle In Transport, Motor Vehicle In Tran…
## $ Intrsct_Relat_ID <fct> Non Intersection, Intersection Related, Non Inter…
## $ FHE_Collsn_ID <fct> Same Direction - Both Going Straight-Rear End, Sa…
## $ Othr_Factr_ID <fct> "Other", "Slowing/Stopping - For Officer, Flagman…
## $ Road_Part_Adj_ID <fct> Main/Proper Lane, Main/Proper Lane, Main/Proper L…
## $ Road_Cls_ID <fct> Us & State Highways, Us & State Highways, City St…
## $ Road_Relat_ID <fct> On Roadway, On Roadway, On Roadway, On Roadway, O…
## $ Func_Sys_ID <fct> Rural Prin Arterial, Unknown, Unknown, Unknown, R…
## $ Crash_Speed_LimitCat <fct> 45-60 mph, Other, 30-40 mph, 45-60 mph, 65-70 mph…
## $ Cr_Hr <fct> 0-6, 0-6, 7-12, 0-6, 7-12, 0-6, 7-12, 7-12, 7-12,…
## $ Unit_Desc_ID <fct> Motor vehicle, Motor vehicle, Motor vehicle, Moto…
## $ Contrib_Factr_1_ID <fct> None, Failed To Control Speed, None, None, None, …
## $ Prsn_Type_ID <fct> Unknown, Driver, Driver, Driver, Unknown, Driver,…
## $ Prsn_Occpnt_Pos_ID <fct> Unknown, Front left, Front left, Front left, Unkn…
## $ Prsn_Ethnicity_ID <fct> Unknown, White, Other, Black, Unknown, Hispanic, …
## $ Prsn_Gndr_ID <fct> Unknown, Male, Female, Male, Unknown, Male, Femal…
## $ Prsn_Age1 <fct> Unknown, 25-54 years, 25-54 years, 25-54 years, U…
## $ Prsn_Injry_Sev_ID1 <fct> O, O, BC, O, O, O, O, O, O, O, O, O, O, BC, O, O,…
## $ Dr1VehType <fct> "Unknown", "Sport Utility Vehicle", "Pickup", "Ot…
## $ Dr1Age1 <fct> Unknown, 55-64 years, 25-54 years, 15-24 years, U…
Feature Engineering
Gradient Boosting (Feature Importance)
set.seed(102)
# Adjust these names as needed
target_var <- "P_I_"
id_var <- abbrev_names[1]
# Prepare data for XGBoost
X <- data.matrix(dat[ , !(names(dat) %in% c(id_var, target_var))])
y <- dat[[target_var]]
bst <- xgboost(
data = X,
label = y,
objective = "reg:squarederror",
nrounds = 100,
max_depth = 5,
eta = 0.3,
verbose = 0
)
# Variable importance
vip(bst, num_features=42) + theme_bw(base_size = 16)
Prepare Data for Clustering
Modular Clustering Function
run_clustering <- function(data, n_clusters, cluster_name = NULL) {
# Run clustering
set.seed(1234)
res <- clusmca(data, n_clusters, 2, method = "clusCA", nstart = 10)
# Add cluster assignment to data
cluster_label <- if (is.null(cluster_name)) paste0("Cluster_", n_clusters) else cluster_name
data[[cluster_label]] <- res$cluster
# Plot
plot(res, cludesc = TRUE, main = paste("MCA Clustering: ", n_clusters, "clusters"))
# Save clustered data
fn <- paste0("Data_with_", n_clusters, "Clusters.csv")
write.csv(data, file = fn, row.names = FALSE)
# Cluster description tables
cg_data <- data
cg_data[[cluster_label]] <- as.factor(cg_data[[cluster_label]])
res1 <- compareGroups(as.formula(paste(cluster_label, "~ .")), data = cg_data, max.ylev = 20, max.xlev = 20)
res2 <- createTable(res1, show.ratio = TRUE, type = 1)
outcsv <- paste0("tableClusterDesc_", n_clusters, ".csv")
export2csv(res2, file = outcsv)
list(clustered_data = data, cluster_object = res, comparegroups_table = res2)
}
Run Clusters
## | | | 0% | |======= | 10% | |============== | 20% | |===================== | 30% | |============================ | 40% | |=================================== | 50% | |========================================== | 60% | |================================================= | 70% | |======================================================== | 80% | |=============================================================== | 90% | |======================================================================| 100%
## | | | 0% | |======= | 10% | |============== | 20% | |===================== | 30% | |============================ | 40% | |=================================== | 50% | |========================================== | 60% | |================================================= | 70% | |======================================================== | 80% | |=============================================================== | 90% | |======================================================================| 100%
## | | | 0% | |======= | 10% | |============== | 20% | |===================== | 30% | |============================ | 40% | |=================================== | 50% | |========================================== | 60% | |================================================= | 70% | |======================================================== | 80% | |=============================================================== | 90% | |======================================================================| 100%