Desc Stat
library(dplyr)
library(tidyr)
library(DT)
library(fst)
library(summarytools)
setwd("C:/Users/mvx13/OneDrive - Texas State University/Hackathon_Monzurul/Tools/shinyapps/CRIS_Tool")
library(fst)
dat= read.fst("CrUnPPr_2017_2024_LimWithNarr2.fst")
names(dat)
## [1] "Crash_ID" "Crash_Date"
## [3] "Crash_Time" "Crash_Speed_Limit"
## [5] "Wthr_Cond_ID" "Light_Cond_ID"
## [7] "Entr_Road_ID" "Road_Type_ID"
## [9] "Road_Algn_ID" "Surf_Cond_ID"
## [11] "Traffic_Cntl_ID" "Investigat_Notify_Time"
## [13] "Investigat_Arrv_Time" "Harm_Evnt_ID"
## [15] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [17] "Obj_Struck_ID" "Othr_Factr_ID"
## [19] "Road_Part_Adj_ID" "Road_Cls_ID"
## [21] "Road_Relat_ID" "Cnty_ID"
## [23] "City_ID" "Latitude"
## [25] "Longitude" "Onsys_Fl"
## [27] "Rural_Fl" "Crash_Sev_ID"
## [29] "Pop_Group_ID" "Day_of_Week"
## [31] "Rural_Urban_Type_ID" "Func_Sys_ID"
## [33] "Adt_Curnt_Amt" "Adt_Curnt_Year"
## [35] "Year" "Investigator_Narrative"
## [37] "Yr_Un" "Unit_ID"
## [39] "UnitNbr_Un" "Unit_Desc_ID"
## [41] "Veh_Lic_Plate_Nbr" "VIN"
## [43] "Veh_Mod_Year" "Veh_Color_ID"
## [45] "Veh_Make_ID" "Veh_Mod_ID"
## [47] "Veh_Body_Styl_ID" "Emer_Respndr_Fl"
## [49] "Veh_Damage_Description1_Id" "Veh_Damage_Severity1_Id"
## [51] "Veh_Cmv_Fl" "Contrib_Factr_1_ID"
## [53] "Contrib_Factr_2_ID" "Pedestrian_Action_ID"
## [55] "Pedalcyclist_Action_ID" "PBCAT_Pedestrian_ID"
## [57] "PBCAT_Pedalcyclist_ID" "E_Scooter_ID"
## [59] "Autonomous_Unit_ID" "Crash_ID_Pr"
## [61] "Yr_Pr" "UnitNbr_Pr"
## [63] "Prsn_Nbr" "Person_ID"
## [65] "Prsn_Type_ID" "Prsn_Occpnt_Pos_ID"
## [67] "Prsn_Injry_Sev_ID" "Prsn_Age"
## [69] "Prsn_Ethnicity_ID" "Prsn_Gndr_ID"
## [71] "Prsn_Ejct_ID" "Prsn_Rest_ID"
## [73] "Prsn_Airbag_ID" "Prsn_Helmet_ID"
## [75] "Drvr_Lic_Type_ID" "Drvr_Lic_State_ID"
## [77] "Drvr_Lic_Number" "Drvr_Lic_Cls_ID"
## [79] "Drvr_DOB" "Drvr_State_ID"
## [81] "Drvr_Zip"
dat1a= dat[, c(1, 36, 38, 62, 22:25, 12, 13, 2, 3, 35, 37, 61, 63, 60, 4:11, 14:21, 28:32, 43:48, 52, 53, 65, 67:75, 58)]
dat3 <- dat1a %>%
group_by(Crash_ID) %>%
filter(any(E_Scooter_ID=="Yes")) %>%
ungroup()
dim(dat3)
## [1] 716 57
##
## Not Applicable Yes
## 361 355
# Step 1: Filter to variables with ≤ 80 unique values
unique_counts <- sapply(dat3, function(x) length(unique(x)))
filtered_vars <- names(unique_counts[unique_counts <= 50])
dat_filtered <- dat3[, filtered_vars]
# Step 2: Reorder all values within each variable by frequency (even numeric treated as categorical)
dat_reordered <- as.data.frame(lapply(dat_filtered, function(x) {
if (is.numeric(x)) {
x <- factor(x) # treat numeric as categorical
}
if (is.factor(x) || is.character(x)) {
freq_order <- names(sort(table(x), decreasing = TRUE))
x <- factor(x, levels = freq_order)
}
return(x)
}))
# Step 3: Show summary
dfSummary(dat_reordered,
style = "grid",
plain.ascii = FALSE,
graph.col = FALSE, max.distinct.values = 50)
## ### Data Frame Summary
## #### dat_reordered
## **Dimensions:** 716 x 44
## **Duplicates:** 0
##
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | No | Variable | Stats / Values | Freqs (% of Valid) | Valid | Missing |
## +====+======================+=================================+====================+==========+=========+
## | 1 | UnitNbr_Pr\ | 1\. 1\ | 345 (50.3%)\ | 686\ | 30\ |
## | | [factor] | 2\. 2\ | 334 (48.7%)\ | (95.8%) | (4.2%) |
## | | | 3\. 3\ | 6 ( 0.9%)\ | | |
## | | | 4\. 5 | 1 ( 0.1%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 2 | Cnty_ID\ | 1\. Harris\ | 136 (19.0%)\ | 716\ | 0\ |
## | | [factor] | 2\. Travis\ | 132 (18.4%)\ | (100.0%) | (0.0%) |
## | | | 3\. Bexar\ | 99 (13.8%)\ | | |
## | | | 4\. Dallas\ | 56 ( 7.8%)\ | | |
## | | | 5\. Denton\ | 32 ( 4.5%)\ | | |
## | | | 6\. Lubbock\ | 28 ( 3.9%)\ | | |
## | | | 7\. Collin\ | 22 ( 3.1%)\ | | |
## | | | 8\. Brazos\ | 18 ( 2.5%)\ | | |
## | | | 9\. Fort Bend\ | 18 ( 2.5%)\ | | |
## | | | 10\. Jefferson\ | 18 ( 2.5%)\ | | |
## | | | 11\. Tarrant\ | 18 ( 2.5%)\ | | |
## | | | 12\. El Paso\ | 12 ( 1.7%)\ | | |
## | | | 13\. Galveston\ | 10 ( 1.4%)\ | | |
## | | | 14\. Ector\ | 9 ( 1.3%)\ | | |
## | | | 15\. Brazoria\ | 8 ( 1.1%)\ | | |
## | | | 16\. Hidalgo\ | 8 ( 1.1%)\ | | |
## | | | 17\. Mclennan\ | 8 ( 1.1%)\ | | |
## | | | 18\. Montgomery\ | 8 ( 1.1%)\ | | |
## | | | 19\. Hays\ | 6 ( 0.8%)\ | | |
## | | | 20\. Nueces\ | 6 ( 0.8%)\ | | |
## | | | 21\. Bell\ | 4 ( 0.6%)\ | | |
## | | | 22\. Cameron\ | 4 ( 0.6%)\ | | |
## | | | 23\. Chambers\ | 4 ( 0.6%)\ | | |
## | | | 24\. Grayson\ | 4 ( 0.6%)\ | | |
## | | | 25\. Taylor\ | 4 ( 0.6%)\ | | |
## | | | 26\. Webb\ | 4 ( 0.6%)\ | | |
## | | | 27\. Williamson\ | 4 ( 0.6%)\ | | |
## | | | 28\. Bastrop\ | 2 ( 0.3%)\ | | |
## | | | 29\. Erath\ | 2 ( 0.3%)\ | | |
## | | | 30\. Gregg\ | 2 ( 0.3%)\ | | |
## | | | 31\. Hockley\ | 2 ( 0.3%)\ | | |
## | | | 32\. Hopkins\ | 2 ( 0.3%)\ | | |
## | | | 33\. Hunt\ | 2 ( 0.3%)\ | | |
## | | | 34\. Johnson\ | 2 ( 0.3%)\ | | |
## | | | 35\. Kaufman\ | 2 ( 0.3%)\ | | |
## | | | 36\. Kerr\ | 2 ( 0.3%)\ | | |
## | | | 37\. Kleberg\ | 2 ( 0.3%)\ | | |
## | | | 38\. Mason\ | 2 ( 0.3%)\ | | |
## | | | 39\. Maverick\ | 2 ( 0.3%)\ | | |
## | | | 40\. Morris\ | 2 ( 0.3%)\ | | |
## | | | 41\. Parker\ | 2 ( 0.3%)\ | | |
## | | | 42\. Rockwall\ | 2 ( 0.3%)\ | | |
## | | | 43\. Tom Green\ | 2 ( 0.3%)\ | | |
## | | | 44\. Walker\ | 2 ( 0.3%)\ | | |
## | | | 45\. Wood | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 3 | Year\ | 1\. 2024\ | 261 (36.5%)\ | 716\ | 0\ |
## | | [factor] | 2\. 2023\ | 230 (32.1%)\ | (100.0%) | (0.0%) |
## | | | 3\. 2022\ | 164 (22.9%)\ | | |
## | | | 4\. 2021 | 61 ( 8.5%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 4 | Yr_Un\ | 1\. 2024\ | 261 (36.5%)\ | 716\ | 0\ |
## | | [factor] | 2\. 2023\ | 230 (32.1%)\ | (100.0%) | (0.0%) |
## | | | 3\. 2022\ | 164 (22.9%)\ | | |
## | | | 4\. 2021 | 61 ( 8.5%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 5 | Yr_Pr\ | 1\. 2024\ | 253 (36.9%)\ | 686\ | 30\ |
## | | [factor] | 2\. 2023\ | 218 (31.8%)\ | (95.8%) | (4.2%) |
## | | | 3\. 2022\ | 156 (22.7%)\ | | |
## | | | 4\. 2021 | 59 ( 8.6%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 6 | Prsn_Nbr\ | 1\. 1 | 686 (100.0%) | 686\ | 30\ |
## | | [factor] | | | (95.8%) | (4.2%) |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 7 | Crash_Speed_Limit\ | 1\. 30\ | 253 (35.3%)\ | 716\ | 0\ |
## | | [factor] | 2\. 35\ | 118 (16.5%)\ | (100.0%) | (0.0%) |
## | | | 3\. 45\ | 72 (10.1%)\ | | |
## | | | 4\. -1\ | 70 ( 9.8%)\ | | |
## | | | 5\. 40\ | 65 ( 9.1%)\ | | |
## | | | 6\. 25\ | 38 ( 5.3%)\ | | |
## | | | 7\. 20\ | 30 ( 4.2%)\ | | |
## | | | 8\. 10\ | 18 ( 2.5%)\ | | |
## | | | 9\. 15\ | 18 ( 2.5%)\ | | |
## | | | 10\. 50\ | 15 ( 2.1%)\ | | |
## | | | 11\. 55\ | 9 ( 1.3%)\ | | |
## | | | 12\. 5\ | 8 ( 1.1%)\ | | |
## | | | 13\. 60 | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 8 | Wthr_Cond_ID\ | 1\. Clear\ | 597 (83.4%)\ | 716\ | 0\ |
## | | [factor] | 2\. Cloudy\ | 89 (12.4%)\ | (100.0%) | (0.0%) |
## | | | 3\. Rain\ | 20 ( 2.8%)\ | | |
## | | | 4\. Unknown\ | 6 ( 0.8%)\ | | |
## | | | 5\. Fog | 4 ( 0.6%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 9 | Light_Cond_ID\ | 1\. Daylight\ | 473 (66.1%)\ | 716\ | 0\ |
## | | [factor] | 2\. Dark, Lighted\ | 176 (24.6%)\ | (100.0%) | (0.0%) |
## | | | 3\. Dark, Not Lighted\ | 43 ( 6.0%)\ | | |
## | | | 4\. Dark, Unknown Lighting\ | 6 ( 0.8%)\ | | |
## | | | 5\. Dusk\ | 6 ( 0.8%)\ | | |
## | | | 6\. Unknown\ | 6 ( 0.8%)\ | | |
## | | | 7\. Dawn\ | 4 ( 0.6%)\ | | |
## | | | 8\. Other (Explain In Narrati | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 10 | Entr_Road_ID\ | 1\. Not Applicable\ | 249 (34.8%)\ | 716\ | 0\ |
## | | [factor] | 2\. Four Entering Roads\ | 204 (28.5%)\ | (100.0%) | (0.0%) |
## | | | 3\. Three Entering Roads - T\ | 180 (25.1%)\ | | |
## | | | 4\. Other (Explain In Narrati\ | 66 ( 9.2%)\ | | |
## | | | 5\. Three Entering Roads - Y\ | 13 ( 1.8%)\ | | |
## | | | 6\. Five Entering Roads\ | 2 ( 0.3%)\ | | |
## | | | 7\. Traffic Circle | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 11 | Road_Type_ID\ | 1\. 2 Lane, 2 Way\ | 305 (56.3%)\ | 542\ | 174\ |
## | | [factor] | 2\. 4 Or More Lanes, Divided\ | 171 (31.5%)\ | (75.7%) | (24.3%) |
## | | | 3\. 4 Or More Lanes, Undivide | 66 (12.2%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 12 | Road_Algn_ID\ | 1\. Straight, Level\ | 637 (89.0%)\ | 716\ | 0\ |
## | | [factor] | 2\. Straight, Grade\ | 35 ( 4.9%)\ | (100.0%) | (0.0%) |
## | | | 3\. Curve, Level\ | 16 ( 2.2%)\ | | |
## | | | 4\. Straight, Hillcrest\ | 14 ( 2.0%)\ | | |
## | | | 5\. Curve, Grade\ | 6 ( 0.8%)\ | | |
## | | | 6\. Other (Explain In Narrati\ | 6 ( 0.8%)\ | | |
## | | | 7\. Unknown | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 13 | Surf_Cond_ID\ | 1\. Dry\ | 667 (93.2%)\ | 716\ | 0\ |
## | | [factor] | 2\. Wet\ | 43 ( 6.0%)\ | (100.0%) | (0.0%) |
## | | | 3\. Unknown | 6 ( 0.8%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 14 | Traffic_Cntl_ID\ | 1\. Marked Lanes\ | 174 (24.3%)\ | 716\ | 0\ |
## | | [factor] | 2\. Stop Sign\ | 145 (20.3%)\ | (100.0%) | (0.0%) |
## | | | 3\. None\ | 140 (19.6%)\ | | |
## | | | 4\. Signal Light\ | 131 (18.3%)\ | | |
## | | | 5\. Crosswalk\ | 52 ( 7.3%)\ | | |
## | | | 6\. Center Stripe/Divider\ | 25 ( 3.5%)\ | | |
## | | | 7\. Other (Explain In Narrati\ | 22 ( 3.1%)\ | | |
## | | | 8\. Signal Light With Red Lig\ | 8 ( 1.1%)\ | | |
## | | | 9\. Yield Sign\ | 7 ( 1.0%)\ | | |
## | | | 10\. Inoperative (Explain In N\ | 6 ( 0.8%)\ | | |
## | | | 11\. Bike Lane\ | 4 ( 0.6%)\ | | |
## | | | 12\. Flashing Yellow Light | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 15 | Harm_Evnt_ID\ | 1\. Motor Vehicle In Transpor\ | 440 (61.5%)\ | 716\ | 0\ |
## | | [factor] | 2\. Other Object\ | 272 (38.0%)\ | (100.0%) | (0.0%) |
## | | | 3\. Fixed Object\ | 2 ( 0.3%)\ | | |
## | | | 4\. Overturned | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 16 | Intrsct_Relat_ID\ | 1\. Intersection\ | 257 (35.9%)\ | 716\ | 0\ |
## | | [factor] | 2\. Non Intersection\ | 208 (29.1%)\ | (100.0%) | (0.0%) |
## | | | 3\. Intersection Related\ | 145 (20.3%)\ | | |
## | | | 4\. Driveway Access | 106 (14.8%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 17 | FHE_Collsn_ID\ | 1\. Angle - Both Going Straig\ | 192 (26.8%)\ | 716\ | 0\ |
## | | [factor] | 2\. One Motor Vehicle - Going\ | 175 (24.4%)\ | (100.0%) | (0.0%) |
## | | | 3\. One Motor Vehicle - Turni\ | 57 ( 8.0%)\ | | |
## | | | 4\. Other\ | 44 ( 6.1%)\ | | |
## | | | 5\. Opposite Direction - One \ | 41 ( 5.7%)\ | | |
## | | | 6\. One Motor Vehicle - Turni\ | 30 ( 4.2%)\ | | |
## | | | 7\. Angle - One Straight-One \ | 24 ( 3.4%)\ | | |
## | | | 8\. Angle - One Straight-One \ | 20 ( 2.8%)\ | | |
## | | | 9\. Same Direction - Both Goi\ | 20 ( 2.8%)\ | | |
## | | | 10\. Same Direction - Both Goi\ | 19 ( 2.7%)\ | | |
## | | | 11\. Same Direction - One Stra\ | 16 ( 2.2%)\ | | |
## | | | 12\. Same Direction - One Stra\ | 16 ( 2.2%)\ | | |
## | | | 13\. One Motor Vehicle - Other\ | 12 ( 1.7%)\ | | |
## | | | 14\. Opposite Direction - Both\ | 11 ( 1.5%)\ | | |
## | | | 15\. Opposite Direction - One \ | 10 ( 1.4%)\ | | |
## | | | 16\. Same Direction - One Stra\ | 8 ( 1.1%)\ | | |
## | | | 17\. Angle - One Left Turn-One\ | 4 ( 0.6%)\ | | |
## | | | 18\. Opposite Direction - One \ | 4 ( 0.6%)\ | | |
## | | | 19\. Same Direction - One Righ\ | 3 ( 0.4%)\ | | |
## | | | 20\. Angle - One Right Turn-On\ | 2 ( 0.3%)\ | | |
## | | | 21\. Angle - One Straight-One \ | 2 ( 0.3%)\ | | |
## | | | 22\. Angle - One Straight-One \ | 2 ( 0.3%)\ | | |
## | | | 23\. One Motor Vehicle - Backi\ | 2 ( 0.3%)\ | | |
## | | | 24\. Other - One Straight-One | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 18 | Obj_Struck_ID\ | 1\. Not Applicable\ | 426 (59.5%)\ | 716\ | 0\ |
## | | [factor] | 2\. Other\ | 272 (38.0%)\ | (100.0%) | (0.0%) |
## | | | 3\. Overturned\ | 14 ( 2.0%)\ | | |
## | | | 4\. Hit Concrete Traffic Barr\ | 2 ( 0.3%)\ | | |
## | | | 5\. Hit Tree, Shrub, Landscap | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 19 | Othr_Factr_ID\ | 1\. Not Applicable·\ | 388 (54.2%)\ | 716\ | 0\ |
## | | [factor] | 2\. Attention Diverted From D\ | 124 (17.3%)\ | (100.0%) | (0.0%) |
## | | | 3\. One Vehicle Leaving Drive\ | 86 (12.0%)\ | | |
## | | | 4\. One Vehicle Entering Driv\ | 44 ( 6.1%)\ | | |
## | | | 5\. Swerved Or Veered-Reason \ | 8 ( 1.1%)\ | | |
## | | | 6\. Vehicle Passing Or Attemp\ | 8 ( 1.1%)\ | | |
## | | | 7\. Vision Obstructed By Stan\ | 8 ( 1.1%)\ | | |
## | | | 8\. Slowing/Stopping - For Of\ | 7 ( 1.0%)\ | | |
## | | | 9\. Construction-Within Poste\ | 6 ( 0.8%)\ | | |
## | | | 10\. School Bus Related Crash·\ | 6 ( 0.8%)\ | | |
## | | | 11\. One Vehicle Backward From\ | 4 ( 0.6%)\ | | |
## | | | 12\. Vision Obstructed By Head\ | 4 ( 0.6%)\ | | |
## | | | 13\. Slowing/Stopping-Reason N\ | 3 ( 0.4%)\ | | |
## | | | 14\. Construction - Within Pos\ | 2 ( 0.3%)\ | | |
## | | | 15\. One Vehicle Forward From \ | 2 ( 0.3%)\ | | |
## | | | 16\. One Vehicle Parked Improp\ | 2 ( 0.3%)\ | | |
## | | | 17\. Open Door Or Object Proje\ | 2 ( 0.3%)\ | | |
## | | | 18\. Slowing/Stopping-For Traf\ | 2 ( 0.3%)\ | | |
## | | | 19\. Slowing/Stopping-To Make \ | 2 ( 0.3%)\ | | |
## | | | 20\. Swerved Or Veered-Avoidin\ | 2 ( 0.3%)\ | | |
## | | | 21\. Vehicle Changing Lanes·\ | 2 ( 0.3%)\ | | |
## | | | 22\. Vision Obstructed By Movi\ | 2 ( 0.3%)\ | | |
## | | | 23\. Vision Obstructed By Othe | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 20 | Road_Part_Adj_ID\ | 1\. Main/Proper Lane\ | 567 (79.2%)\ | 716\ | 0\ |
## | | [factor] | 2\. Other (Explain In Narrati\ | 114 (15.9%)\ | (100.0%) | (0.0%) |
## | | | 3\. Service/Frontage Road | 35 ( 4.9%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 21 | Road_Cls_ID\ | 1\. City Street\ | 455 (63.5%)\ | 716\ | 0\ |
## | | [factor] | 2\. Non Trafficway\ | 114 (15.9%)\ | (100.0%) | (0.0%) |
## | | | 3\. Us & State Highways\ | 69 ( 9.6%)\ | | |
## | | | 4\. Farm To Market\ | 33 ( 4.6%)\ | | |
## | | | 5\. County Road\ | 28 ( 3.9%)\ | | |
## | | | 6\. Interstate\ | 15 ( 2.1%)\ | | |
## | | | 7\. Other Roads | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 22 | Road_Relat_ID\ | 1\. On Roadway\ | 568 (79.3%)\ | 716\ | 0\ |
## | | [factor] | 2\. Not Applicable\ | 112 (15.6%)\ | (100.0%) | (0.0%) |
## | | | 3\. Off Roadway\ | 30 ( 4.2%)\ | | |
## | | | 4\. Shoulder | 6 ( 0.8%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 23 | Crash_Sev_ID\ | 1\. Non-Incapacitating\ | 341 (47.6%)\ | 716\ | 0\ |
## | | [factor] | 2\. Possible Injury\ | 139 (19.4%)\ | (100.0%) | (0.0%) |
## | | | 3\. Not Injured\ | 107 (14.9%)\ | | |
## | | | 4\. Incapacitating Injury\ | 103 (14.4%)\ | | |
## | | | 5\. Killed\ | 16 ( 2.2%)\ | | |
## | | | 6\. Unknown | 10 ( 1.4%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 24 | Pop_Group_ID\ | 1\. 250,000 Pop And Over\ | 433 (60.5%)\ | 716\ | 0\ |
## | | [factor] | 2\. 100,000 - 249,999 Pop\ | 74 (10.3%)\ | (100.0%) | (0.0%) |
## | | | 3\. Rural\ | 67 ( 9.4%)\ | | |
## | | | 4\. 50,000 - 99,999 Pop\ | 60 ( 8.4%)\ | | |
## | | | 5\. 10,000 - 24,999 Pop\ | 36 ( 5.0%)\ | | |
## | | | 6\. 25,000 - 49,999 Pop\ | 24 ( 3.4%)\ | | |
## | | | 7\. 5,000 - 9,999 Pop\ | 10 ( 1.4%)\ | | |
## | | | 8\. Town Under 2,499 Pop\ | 8 ( 1.1%)\ | | |
## | | | 9\. 2,500 - 4,999 Pop | 4 ( 0.6%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 25 | Day_of_Week\ | 1\. WED\ | 128 (17.9%)\ | 716\ | 0\ |
## | | [factor] | 2\. FRI\ | 110 (15.4%)\ | (100.0%) | (0.0%) |
## | | | 3\. THU\ | 106 (14.8%)\ | | |
## | | | 4\. SAT\ | 104 (14.5%)\ | | |
## | | | 5\. TUE\ | 96 (13.4%)\ | | |
## | | | 6\. SUN\ | 92 (12.8%)\ | | |
## | | | 7\. MON | 80 (11.2%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 26 | Rural_Urban_Type_ID\ | 1\. Urbanized (200,000+)\ | 96 (65.3%)\ | 147\ | 569\ |
## | | [factor] | 2\. Large Urban (50,000-199,9\ | 29 (19.7%)\ | (20.5%) | (79.5%) |
## | | | 3\. Small Urban (5000-49,999)\ | 12 ( 8.2%)\ | | |
## | | | 4\. Rural (<5000) | 10 ( 6.8%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 27 | Func_Sys_ID\ | 1\. Rural Prin Arterial\ | 25 (67.6%)\ | 37\ | 679\ |
## | | [factor] | 2\. Rural Interstate | 12 (32.4%) | (5.2%) | (94.8%) |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 28 | Veh_Mod_Year\ | 1\. 2015\ | 27 (8.7%)\ | 311\ | 405\ |
## | | [factor] | 2\. 2019\ | 27 (8.7%)\ | (43.4%) | (56.6%) |
## | | | 3\. 2017\ | 23 (7.4%)\ | | |
## | | | 4\. 2018\ | 22 (7.1%)\ | | |
## | | | 5\. 2014\ | 21 (6.8%)\ | | |
## | | | 6\. 2016\ | 21 (6.8%)\ | | |
## | | | 7\. 2020\ | 21 (6.8%)\ | | |
## | | | 8\. 2021\ | 21 (6.8%)\ | | |
## | | | 9\. 2013\ | 18 (5.8%)\ | | |
## | | | 10\. 2007\ | 13 (4.2%)\ | | |
## | | | 11\. 2012\ | 13 (4.2%)\ | | |
## | | | 12\. 2022\ | 13 (4.2%)\ | | |
## | | | 13\. 2023\ | 10 (3.2%)\ | | |
## | | | 14\. 2004\ | 7 (2.3%)\ | | |
## | | | 15\. 2008\ | 7 (2.3%)\ | | |
## | | | 16\. 2010\ | 7 (2.3%)\ | | |
## | | | 17\. 2011\ | 6 (1.9%)\ | | |
## | | | 18\. 2006\ | 5 (1.6%)\ | | |
## | | | 19\. 2024\ | 5 (1.6%)\ | | |
## | | | 20\. 2001\ | 4 (1.3%)\ | | |
## | | | 21\. 2005\ | 4 (1.3%)\ | | |
## | | | 22\. 2009\ | 4 (1.3%)\ | | |
## | | | 23\. 2003\ | 3 (1.0%)\ | | |
## | | | 24\. 2000\ | 2 (0.6%)\ | | |
## | | | 25\. 1993\ | 1 (0.3%)\ | | |
## | | | 26\. 1995\ | 1 (0.3%)\ | | |
## | | | 27\. 1996\ | 1 (0.3%)\ | | |
## | | | 28\. 1997\ | 1 (0.3%)\ | | |
## | | | 29\. 1998\ | 1 (0.3%)\ | | |
## | | | 30\. 1999\ | 1 (0.3%)\ | | |
## | | | 31\. 2002 | 1 (0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 29 | Veh_Color_ID\ | 1\. White\ | 81 (23.2%)\ | 349\ | 367\ |
## | | [factor] | 2\. Black\ | 62 (17.8%)\ | (48.7%) | (51.3%) |
## | | | 3\. Gray\ | 60 (17.2%)\ | | |
## | | | 4\. Silver\ | 39 (11.2%)\ | | |
## | | | 5\. Blue\ | 30 ( 8.6%)\ | | |
## | | | 6\. Red\ | 29 ( 8.3%)\ | | |
## | | | 7\. Unknown\ | 20 ( 5.7%)\ | | |
## | | | 8\. Maroon\ | 6 ( 1.7%)\ | | |
## | | | 9\. Gold\ | 5 ( 1.4%)\ | | |
## | | | 10\. Yellow\ | 5 ( 1.4%)\ | | |
## | | | 11\. Brown\ | 4 ( 1.1%)\ | | |
## | | | 12\. Green\ | 2 ( 0.6%)\ | | |
## | | | 13\. Beige\ | 1 ( 0.3%)\ | | |
## | | | 14\. Bronze\ | 1 ( 0.3%)\ | | |
## | | | 15\. Orange\ | 1 ( 0.3%)\ | | |
## | | | 16\. Pink\ | 1 ( 0.3%)\ | | |
## | | | 17\. Tan\ | 1 ( 0.3%)\ | | |
## | | | 18\. Turquoise(Blue) | 1 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 30 | Veh_Make_ID\ | 1\. Toyota\ | 51 (15.2%)\ | 336\ | 380\ |
## | | [factor] | 2\. Ford\ | 48 (14.3%)\ | (46.9%) | (53.1%) |
## | | | 3\. Chevrolet\ | 44 (13.1%)\ | | |
## | | | 4\. Nissan\ | 28 ( 8.3%)\ | | |
## | | | 5\. Honda\ | 19 ( 5.7%)\ | | |
## | | | 6\. Dodge\ | 18 ( 5.4%)\ | | |
## | | | 7\. Jeep\ | 16 ( 4.8%)\ | | |
## | | | 8\. Hyundai\ | 15 ( 4.5%)\ | | |
## | | | 9\. Unknown\ | 11 ( 3.3%)\ | | |
## | | | 10\. Kia\ | 10 ( 3.0%)\ | | |
## | | | 11\. Gmc\ | 7 ( 2.1%)\ | | |
## | | | 12\. Mazda\ | 7 ( 2.1%)\ | | |
## | | | 13\. Volkswagen\ | 7 ( 2.1%)\ | | |
## | | | 14\. Chrysler\ | 5 ( 1.5%)\ | | |
## | | | 15\. Lexus\ | 5 ( 1.5%)\ | | |
## | | | 16\. Bmw\ | 4 ( 1.2%)\ | | |
## | | | 17\. Mercedes-Benz\ | 4 ( 1.2%)\ | | |
## | | | 18\. Mitsubishi\ | 4 ( 1.2%)\ | | |
## | | | 19\. Ram\ | 4 ( 1.2%)\ | | |
## | | | 20\. Tesla\ | 4 ( 1.2%)\ | | |
## | | | 21\. Acura\ | 3 ( 0.9%)\ | | |
## | | | 22\. Buick\ | 3 ( 0.9%)\ | | |
## | | | 23\. Audi\ | 2 ( 0.6%)\ | | |
## | | | 24\. Infiniti\ | 2 ( 0.6%)\ | | |
## | | | 25\. Subaru\ | 2 ( 0.6%)\ | | |
## | | | 26\. Alfa Romeo\ | 1 ( 0.3%)\ | | |
## | | | 27\. All Other Makes\ | 1 ( 0.3%)\ | | |
## | | | 28\. Blue Bird\ | 1 ( 0.3%)\ | | |
## | | | 29\. Cadillac\ | 1 ( 0.3%)\ | | |
## | | | 30\. Harley-Davidson\ | 1 ( 0.3%)\ | | |
## | | | 31\. Hummer\ | 1 ( 0.3%)\ | | |
## | | | 32\. Ic Corporation\ | 1 ( 0.3%)\ | | |
## | | | 33\. International\ | 1 ( 0.3%)\ | | |
## | | | 34\. Isuzu\ | 1 ( 0.3%)\ | | |
## | | | 35\. Land Rover\ | 1 ( 0.3%)\ | | |
## | | | 36\. Peterbilt\ | 1 ( 0.3%)\ | | |
## | | | 37\. Saturn\ | 1 ( 0.3%)\ | | |
## | | | 38\. Volvo | 1 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 31 | Veh_Body_Styl_ID\ | 1\. Passenger Car, 4-Door\ | 152 (42.6%)\ | 357\ | 359\ |
## | | [factor] | 2\. Sport Utility Vehicle\ | 95 (26.6%)\ | (49.9%) | (50.1%) |
## | | | 3\. Pickup\ | 51 (14.3%)\ | | |
## | | | 4\. Unknown\ | 20 ( 5.6%)\ | | |
## | | | 5\. Passenger Car, 2-Door\ | 11 ( 3.1%)\ | | |
## | | | 6\. Van\ | 8 ( 2.2%)\ | | |
## | | | 7\. Truck\ | 7 ( 2.0%)\ | | |
## | | | 8\. Police Car/Truck\ | 4 ( 1.1%)\ | | |
## | | | 9\. Motorcycle\ | 3 ( 0.8%)\ | | |
## | | | 10\. Yellow School Bus\ | 3 ( 0.8%)\ | | |
## | | | 11\. Fire Truck\ | 1 ( 0.3%)\ | | |
## | | | 12\. Other (Explain In Narrati\ | 1 ( 0.3%)\ | | |
## | | | 13\. Truck Tractor | 1 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 32 | Emer_Respndr_Fl\ | 1\. N\ | 640 (89.4%)\ | 716\ | 0\ |
## | | [factor] | 2\. (Empty string)\ | 72 (10.1%)\ | (100.0%) | (0.0%) |
## | | | 3\. Y | 4 ( 0.6%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 33 | Contrib_Factr_1_ID\ | 1\. None\ | 300 (45.3%)\ | 662\ | 54\ |
## | | [factor] | 2\. Other (Explain In Narrati\ | 79 (11.9%)\ | (92.5%) | (7.5%) |
## | | | 3\. Driver Inattention\ | 62 ( 9.4%)\ | | |
## | | | 4\. Failed To Yield Right Of \ | 45 ( 6.8%)\ | | |
## | | | 5\. Pedestrian Failed To Yiel\ | 19 ( 2.9%)\ | | |
## | | | 6\. Disregard Stop And Go Sig\ | 15 ( 2.3%)\ | | |
## | | | 7\. Disregard Stop Sign Or Li\ | 15 ( 2.3%)\ | | |
## | | | 8\. Failed To Yield Right Of \ | 14 ( 2.1%)\ | | |
## | | | 9\. Failed To Control Speed\ | 12 ( 1.8%)\ | | |
## | | | 10\. Failed To Yield Right Of \ | 11 ( 1.7%)\ | | |
## | | | 11\. Failed To Yield Right Of \ | 10 ( 1.5%)\ | | |
## | | | 12\. Failed To Drive In Single\ | 8 ( 1.2%)\ | | |
## | | | 13\. Failed To Yield Right Of \ | 8 ( 1.2%)\ | | |
## | | | 14\. Unsafe Speed\ | 6 ( 0.9%)\ | | |
## | | | 15\. Impaired Visibility (Expl\ | 5 ( 0.8%)\ | | |
## | | | 16\. Wrong Side - Approach Or \ | 5 ( 0.8%)\ | | |
## | | | 17\. Wrong Way - One Way Road\ | 5 ( 0.8%)\ | | |
## | | | 18\. Faulty Evasive Action\ | 4 ( 0.6%)\ | | |
## | | | 19\. Turned When Unsafe\ | 4 ( 0.6%)\ | | |
## | | | 20\. Backed Without Safety\ | 3 ( 0.5%)\ | | |
## | | | 21\. Distraction In Vehicle\ | 3 ( 0.5%)\ | | |
## | | | 22\. Failed To Pass To Right S\ | 3 ( 0.5%)\ | | |
## | | | 23\. Failed To Stop At Proper \ | 3 ( 0.5%)\ | | |
## | | | 24\. Failed To Yield Right Of \ | 3 ( 0.5%)\ | | |
## | | | 25\. Animal On Road - Domestic\ | 2 ( 0.3%)\ | | |
## | | | 26\. Changed Lane When Unsafe\ | 2 ( 0.3%)\ | | |
## | | | 27\. Failed To Heed Warning Si\ | 2 ( 0.3%)\ | | |
## | | | 28\. Failed To Pass To Left Sa\ | 2 ( 0.3%)\ | | |
## | | | 29\. Had Been Drinking\ | 2 ( 0.3%)\ | | |
## | | | 30\. Speeding - (Overlimit)\ | 2 ( 0.3%)\ | | |
## | | | 31\. Turned Improperly - Wrong\ | 2 ( 0.3%)\ | | |
## | | | 32\. Cell/Mobile Device Use - \ | 1 ( 0.2%)\ | | |
## | | | 33\. Disregard Turn Marks At I\ | 1 ( 0.2%)\ | | |
## | | | 34\. Failed To Yield Right Of \ | 1 ( 0.2%)\ | | |
## | | | 35\. Parked In Traffic Lane\ | 1 ( 0.2%)\ | | |
## | | | 36\. Turned Improperly - Cut C\ | 1 ( 0.2%)\ | | |
## | | | 37\. Under Influence - Alcohol | 1 ( 0.2%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 34 | Contrib_Factr_2_ID\ | 1\. Driver Inattention\ | 11 (28.9%)\ | 38\ | 678\ |
## | | [factor] | 2\. Other (Explain In Narrati\ | 4 (10.5%)\ | (5.3%) | (94.7%) |
## | | | 3\. Failed To Control Speed\ | 2 ( 5.3%)\ | | |
## | | | 4\. Failed To Yield Right Of \ | 2 ( 5.3%)\ | | |
## | | | 5\. Failed To Yield Right Of \ | 2 ( 5.3%)\ | | |
## | | | 6\. Turned When Unsafe\ | 2 ( 5.3%)\ | | |
## | | | 7\. Wrong Side - Approach Or \ | 2 ( 5.3%)\ | | |
## | | | 8\. Cell/Mobile Device Use - \ | 1 ( 2.6%)\ | | |
## | | | 9\. Changed Lane When Unsafe\ | 1 ( 2.6%)\ | | |
## | | | 10\. Disregard Stop And Go Sig\ | 1 ( 2.6%)\ | | |
## | | | 11\. Drove Without Headlights\ | 1 ( 2.6%)\ | | |
## | | | 12\. Failed To Heed Warning Si\ | 1 ( 2.6%)\ | | |
## | | | 13\. Failed To Stop At Proper \ | 1 ( 2.6%)\ | | |
## | | | 14\. Failed To Yield Right Of \ | 1 ( 2.6%)\ | | |
## | | | 15\. Had Been Drinking\ | 1 ( 2.6%)\ | | |
## | | | 16\. Impaired Visibility (Expl\ | 1 ( 2.6%)\ | | |
## | | | 17\. Opened Door Into Traffic \ | 1 ( 2.6%)\ | | |
## | | | 18\. Pedestrian Failed To Yiel\ | 1 ( 2.6%)\ | | |
## | | | 19\. Turned Improperly - Wrong\ | 1 ( 2.6%)\ | | |
## | | | 20\. Under Influence - Alcohol | 1 ( 2.6%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 35 | Prsn_Type_ID\ | 1\. Driver\ | 344 (50.1%)\ | 686\ | 30\ |
## | | [factor] | 2\. Other (Explain In Narrati\ | 337 (49.1%)\ | (95.8%) | (4.2%) |
## | | | 3\. Driver Of Motorcycle Type\ | 3 ( 0.4%)\ | | |
## | | | 4\. Pedestrian\ | 1 ( 0.1%)\ | | |
## | | | 5\. Unknown | 1 ( 0.1%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 36 | Prsn_Injry_Sev_ID\ | 1\. Not Injured\ | 333 (48.5%)\ | 686\ | 30\ |
## | | [factor] | 2\. Non-Incapacitating Injury\ | 168 (24.5%)\ | (95.8%) | (4.2%) |
## | | | 3\. Possible Injury\ | 73 (10.6%)\ | | |
## | | | 4\. Unknown\ | 57 ( 8.3%)\ | | |
## | | | 5\. Incapacitating Injury\ | 51 ( 7.4%)\ | | |
## | | | 6\. Killed | 4 ( 0.6%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 37 | Prsn_Ethnicity_ID\ | 1\. White\ | 284 (41.5%)\ | 684\ | 32\ |
## | | [factor] | 2\. Hispanic\ | 181 (26.5%)\ | (95.5%) | (4.5%) |
## | | | 3\. Black\ | 126 (18.4%)\ | | |
## | | | 4\. Unknown\ | 50 ( 7.3%)\ | | |
## | | | 5\. Asian\ | 37 ( 5.4%)\ | | |
## | | | 6\. Other\ | 4 ( 0.6%)\ | | |
## | | | 7\. Amer. Indian/Alaskan Nati | 2 ( 0.3%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 38 | Prsn_Gndr_ID\ | 1\. Male\ | 432 (63.0%)\ | 686\ | 30\ |
## | | [factor] | 2\. Female\ | 218 (31.8%)\ | (95.8%) | (4.2%) |
## | | | 3\. Unknown | 36 ( 5.2%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 39 | Prsn_Ejct_ID\ | 1\. Not Applicable\ | 350 (51.0%)\ | 686\ | 30\ |
## | | [factor] | 2\. No\ | 303 (44.2%)\ | (95.8%) | (4.2%) |
## | | | 3\. Unknown | 33 ( 4.8%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 40 | Prsn_Rest_ID\ | 1\. Not Applicable\ | 345 (50.3%)\ | 686\ | 30\ |
## | | [factor] | 2\. Shoulder & Lap Belt\ | 271 (39.5%)\ | (95.8%) | (4.2%) |
## | | | 3\. Unknown\ | 68 ( 9.9%)\ | | |
## | | | 4\. Child Seat, Unknown\ | 1 ( 0.1%)\ | | |
## | | | 5\. None | 1 ( 0.1%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 41 | Prsn_Airbag_ID\ | 1\. Not Applicable\ | 345 (50.3%)\ | 686\ | 30\ |
## | | [factor] | 2\. Not Deployed\ | 292 (42.6%)\ | (95.8%) | (4.2%) |
## | | | 3\. Unknown\ | 46 ( 6.7%)\ | | |
## | | | 4\. Deployed, Front\ | 2 ( 0.3%)\ | | |
## | | | 5\. Deployed, Multiple | 1 ( 0.1%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 42 | Prsn_Helmet_ID\ | 1\. Not Applicable\ | 682 (99.4%)\ | 686\ | 30\ |
## | | [factor] | 2\. Unknown If Worn\ | 3 ( 0.4%)\ | (95.8%) | (4.2%) |
## | | | 3\. Worn, Unk Damage | 1 ( 0.1%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 43 | Drvr_Lic_Type_ID\ | 1\. Driver License\ | 381 (63.0%)\ | 605\ | 111\ |
## | | [factor] | 2\. Unlicensed\ | 95 (15.7%)\ | (84.5%) | (15.5%) |
## | | | 3\. Unknown\ | 64 (10.6%)\ | | |
## | | | 4\. Id Card\ | 55 ( 9.1%)\ | | |
## | | | 5\. Commercial Driver Lic.\ | 7 ( 1.2%)\ | | |
## | | | 6\. Other | 3 ( 0.5%) | | |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
## | 44 | E_Scooter_ID\ | 1\. Not Applicable\ | 361 (50.4%)\ | 716\ | 0\ |
## | | [factor] | 2\. Yes | 355 (49.6%) | (100.0%) | (0.0%) |
## +----+----------------------+---------------------------------+--------------------+----------+---------+
Comparison
dat1= dat[, -c(2, 3, 7, 22, 23, 24, 25, 38:39, 60, 62, 3, 74:81, 41:46, 36 )]
dat2= dat1[, -c(28, 44, 43,8,9, 45 , 25, 26)]
dat3a <- dat2 %>%
group_by(Crash_ID) %>%
filter(any(E_Scooter_ID=="Yes")) %>%
ungroup()
dim(dat3a)
## [1] 716 47
dat3a= dat3a[, -c(1, 23, 2, 40, 42, 28, 29)]
dat2= dat2[, -c(1, 23, 2, 40, 42, 28, 29)]
dat3a= dat3a[, c(1:14, 17:20, 22, 23, 26, 32, 34:40)]
dat2= dat2[, c(1:14, 17:20, 22, 23, 26, 32, 34:40)]
# Step 1: Select character/factor variables to compare
vars_to_compare <- dat2 %>%
select(where(~is.character(.x) || is.factor(.x))) %>%
names()
get_count_and_percentage <- function(data, variable) {
data %>%
filter(!is.na(.data[[variable]]) & .data[[variable]] != "") %>% # Exclude NA/blank
count(Value = .data[[variable]]) %>%
mutate(Percentage = 100 * n / sum(n)) %>% # Use `n` before renaming
rename(Count = n)
}
# Step 3: Loop through variables and compare
comparison_list <- lapply(vars_to_compare, function(var) {
orig <- get_count_and_percentage(dat2, var) %>%
rename(Orig_Count = Count, Orig_Perc = Percentage)
sub <- get_count_and_percentage(dat3a, var) %>%
rename(Sub_Count = Count, Sub_Perc = Percentage)
merged <- full_join(orig, sub, by = "Value") %>%
mutate(Variable = var) %>%
replace_na(list(Orig_Count = 0, Sub_Count = 0, Orig_Perc = 0, Sub_Perc = 0)) %>%
mutate(
Orig_Perc = round(Orig_Perc, 3),
Sub_Perc = round(Sub_Perc, 3),
Diff = round(Sub_Perc - Orig_Perc, 3)
) %>%
arrange(Sub_Perc)
})
# Step 4: Combine into one data frame
comparison_df <- bind_rows(comparison_list) %>%
select(Variable, Value, Orig_Count, Sub_Count, Orig_Perc, Sub_Perc, Diff)
# Step 5: Visualize with colored difference
DT::datatable(comparison_df, options = list(pageLength = 25)) %>%
DT::formatStyle(
'Diff',
color = DT::styleInterval(0, c('red', 'green')) # red for negative, green for positive
)