1 FARS Data

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
getwd()
## [1] "C:/Users/mvx13/OneDrive - Texas State University/Papers/TRB 2024/FARS"
library(tidyverse)
library(dplyr)
library(DT)
library(data.table)
library(forcats)
library(DataExplorer)
library(skimr)

fars= fread("rfars_2016_2021Clean.csv")
dim(fars)
## [1] 522678    206
fars = fars %>% mutate_if(is.character,as.factor)
skim(fars)
Data summary
Name fars
Number of rows 522678
Number of columns 206
Key NULL
_______________________
Column type frequency:
factor 183
numeric 23
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
state 0 1.00 FALSE 51 Cal: 55696, Tex: 53949, Flo: 47163, Geo: 22077
acc1 0 1.00 FALSE 212157 us2: 135, us2: 120, us2: 74, us2: 73
veh1 0 1.00 FALSE 370780 us2: 61, us2: 57, us2: 52, us2: 50
per1 0 1.00 FALSE 522644 us2: 3, us2: 3, us2: 3, us2: 3
acc_type 50732 0.90 FALSE 91 M98: 66990, C13: 53990, A1-: 39041, B6-: 28606
age 0 1.00 FALSE 110 Unk: 12801, 21 : 12368, 19 : 11810, 22 : 11766
air_bag 0 1.00 FALSE 13 Not: 236049, Dep: 73716, Dep: 63729, Not: 48441
alc_det 0 1.00 FALSE 9 Unk: 426195, Evi: 61755, Obs: 25761, Pre: 5347
alc_res 0 1.00 FALSE 550 Tes: 285653, 0.0: 120062, Unk: 40785, AC : 7168
alc_status 0 1.00 FALSE 6 Tes: 285570, Tes: 191478, Unk: 40785, UnK: 2494
arr_hour 0 1.00 FALSE 26 Unk: 271622, 6:0: 15709, 5:0: 15318, 4:0: 14984
arr_min 0 1.00 FALSE 64 Unk: 241324, Unk: 30563, 0: 5575, 10: 5443
atst_typ 0 1.00 FALSE 15 Tes: 285653, Blo: 167954, Unk: 42244, Vit: 6841
bikecgp 474765 0.09 FALSE 22 Not: 42383, Mot: 1699, Bic: 451, Par: 398
bikectype 474765 0.09 FALSE 79 Not: 42383, Mot: 748, Mot: 465, Par: 342
bikedir 474765 0.09 FALSE 5 Not: 42383, Wit: 3613, Fac: 784, Not: 667
bikeloc 474765 0.09 FALSE 6 Not: 42383, Not: 3498, At : 1545, Int: 448
bikepos 474765 0.09 FALSE 9 Not: 42383, Tra: 4352, Sid: 573, Bic: 458
body_typ 50732 0.90 FALSE 88 4-d: 134668, Com: 64952, Lig: 62618, Two: 27562
bus_use 50732 0.90 FALSE 8 Not: 462587, Unk: 4801, Sch: 1510, Tra: 1337
cargo_bt 50732 0.90 FALSE 18 Not: 426572, Van: 13913, Oth: 6290, Bus: 4829
cdl_stat 50732 0.90 FALSE 12 No : 403211, Val: 48168, Unk: 14110, Dis: 2011
cityname 0 1.00 FALSE 8682 NOT: 251128, Unk: 5455, LOS: 4460, HOU: 3800
countyname 0 1.00 FALSE 2992 LOS: 11401, MAR: 7459, HAR: 6744, COO: 4965
day_week 0 1.00 FALSE 7 Sat: 92046, Fri: 83232, Sun: 81346, Thu: 69465
death_da 0 1.00 FALSE 34 Not: 292233, 1: 7818, 3: 7698, 4: 7692
death_hr 0 1.00 FALSE 27 Not: 292233, 21:: 12292, 18:: 12087, 20:: 12071
death_mn 0 1.00 FALSE 63 Not: 292233, Unk: 12069, 0: 9511, 30: 8915
death_mo 0 1.00 FALSE 15 Not: 292233, Oct: 21364, Jul: 20994, Aug: 20837
death_tm 218207 0.58 FALSE 3 Not: 292233, Unk: 11539, Red: 699
death_yr 0 1.00 FALSE 10 Not: 292233, 202: 42368, 202: 38642, 201: 37733
deformed 50732 0.90 FALSE 5 Dis: 343123, Unk: 46816, Fun: 40842, Min: 33078
doa 0 1.00 FALSE 4 Not: 390569, Die: 130074, Die: 1684, Unk: 351
dr_drink 50732 0.90 FALSE 2 No: 391714, Yes: 80232
dr_hgt 443844 0.15 FALSE 2 Unk: 78302, Oth: 532
dr_pres 50732 0.90 FALSE 3 Yes: 471414, No : 475, Unk: 57
dr_wgt 50732 0.90 FALSE 404 Unk: 173267, 200: 17058, 180: 16708, 160: 14600
dr_zip 510007 0.02 FALSE 3 Unk: 10206, Not: 1933, No : 532
drinking 0 1.00 FALSE 4 Unk: 229425, No : 221696, Yes: 51533, Unk: 20024
drug_det 0 1.00 FALSE 7 Unk: 449096, Evi: 49746, Oth: 14872, Obs: 5405
drugs 0 1.00 FALSE 3 Unk: 282719, No : 208090, Yes: 31869
dstatus 0 1.00 FALSE 6 Tes: 304119, Tes: 166103, Unk: 47838, Unk: 2379
ej_path 0 1.00 FALSE 10 Eje: 480751, Eje: 31023, Thr: 4933, Thr: 1995
ejection 0 1.00 FALSE 8 Not: 390062, Not: 85409, Tot: 33373, Par: 7944
emer_use 50732 0.90 FALSE 7 Not: 469464, Unk: 1546, Eme: 410, Eme: 226
extricat 0 1.00 FALSE 3 Not: 460587, Ext: 50338, Unk: 11753
fire_exp 50732 0.90 FALSE 2 No : 456273, Yes: 15673
first_mo 50732 0.90 FALSE 15 No : 258977, Unk: 17653, May: 17098, Mar: 17009
first_yr 50732 0.90 FALSE 14 No : 258977, 201: 32694, 201: 29124, 201: 28561
func_sys 0 1.00 FALSE 9 Pri: 169183, Min: 113153, Int: 75438, Maj: 70798
gvwr 215666 0.59 FALSE 5 Not: 278566, 26,: 18961, 10,: 7691, Unk: 1522
harm_ev 0 1.00 FALSE 57 Mot: 274307, Ped: 88393, Rol: 29399, Tre: 22085
haz_cno 50732 0.90 FALSE 11 Not: 471058, 3: 458, 2: 147, Unk: 95
haz_id 51452 0.90 FALSE 2 Not: 471058, Unk: 168
haz_inv 50732 0.90 FALSE 2 No: 471058, Yes: 888
haz_plac 50732 0.90 FALSE 4 Not: 471058, Yes: 804, No: 56, Unk: 28
haz_rel 50732 0.90 FALSE 4 Not: 471058, No: 542, Yes: 288, Unk: 58
hispanic 0 1.00 FALSE 10 Not: 292233, Non: 172819, Unk: 21316, His: 15336
hit_run 50732 0.90 FALSE 3 No: 454733, Yes: 16798, Unk: 415
hosp_hr 0 1.00 FALSE 26 Unk: 235274, Not: 138498, 6:0: 10201, 7:0: 9872
hosp_mn 0 1.00 FALSE 65 Unk: 231865, Not: 138501, 0: 4449, 30: 3930
hospital 0 1.00 FALSE 9 EMS: 189104, Not: 185509, Not: 105117, EMS: 19603
hour 0 1.00 FALSE 25 6:0: 32777, 8:0: 31272, 5:0: 31019, 9:0: 30652
impact1 50732 0.90 FALSE 26 12 : 250642, 6 C: 32618, Non: 31316, Unk: 20654
inj_sev 0 1.00 FALSE 10 Fat: 230445, No : 131792, Sus: 46001, Sus: 44857
j_knife 50732 0.90 FALSE 4 Not: 445098, No: 25106, Yes: 1332, Yes: 410
l_compl 50732 0.90 FALSE 7 Val: 377464, No : 47465, Not: 29505, Unk: 14304
l_endors 50732 0.90 FALSE 6 No : 445030, Unk: 14079, End: 7142, End: 4853
l_restri 50732 0.90 FALSE 6 No : 324935, Res: 107620, Res: 19118, Unk: 15481
l_state 50732 0.90 FALSE 61 Tex: 46672, Cal: 46604, Flo: 40011, Geo: 19560
l_status 50732 0.90 FALSE 8 Val: 386864, Not: 29913, Sus: 27975, Unk: 13768
l_type 50732 0.90 FALSE 7 Ful: 414763, Not: 29913, Unk: 13723, Int: 8700
lag_hrs 217296 0.58 FALSE 1 Unk: 305382
lag_mins 217296 0.58 FALSE 1 Unk: 305382
last_mo 50732 0.90 FALSE 15 No : 258924, Unk: 17662, May: 16833, Aug: 16787
last_yr 50732 0.90 FALSE 14 No : 258924, 201: 33245, 201: 31545, 201: 28341
lgt_cond 0 1.00 FALSE 8 Day: 254985, Dar: 132606, Dar: 107442, Dus: 12724
location 0 1.00 FALSE 21 Occ: 474237, Not: 30747, At : 4428, At : 2801
m_harm 50732 0.90 FALSE 57 Mot: 267584, Rol: 54811, Ped: 51523, Tre: 30627
mak_mod 50734 0.90 FALSE 1251 For: 19795, Che: 17835, Har: 14086, Dod: 11422
make 50732 0.90 FALSE 72 For: 67245, Che: 66012, Toy: 40626, Hon: 34704
man_coll 0 1.00 FALSE 11 The: 125686, Ang: 123531, Not: 121663, Fro: 69244
mcarr_i1 50732 0.90 FALSE 57 Not: 426510, US : 23577, Non: 9316, Unk: 8661
mcarr_i2 78162 0.85 FALSE 3 Not: 426508, Non: 9320, Unk: 8688
mcarr_id 78193 0.85 FALSE 3 Not: 426508, Non: 9316, Unk: 8661
milept 301503 0.42 FALSE 2 Non: 124043, Unk: 97132
minute 0 1.00 FALSE 61 0: 23830, 30: 22243, 50: 17191, 20: 16786
mod_year 513250 0.02 FALSE 1 Unk: 9428
month 0 1.00 FALSE 12 Oct: 48474, Jul: 48158, Aug: 47636, Sep: 47177
motdir 474765 0.09 FALSE 7 Not: 31572, Not: 5530, Sou: 2777, Nor: 2725
motman 474765 0.09 FALSE 6 Not: 31713, Str: 8432, Not: 5530, Lef: 1413
msafeqmt 514983 0.01 FALSE 8 Unk: 4077, Non: 3170, Hel: 159, Unk: 122
nhs 0 1.00 FALSE 3 Thi: 287201, Thi: 233691, Unk: 1786
not_hour 0 1.00 FALSE 26 Unk: 269552, 6:0: 15788, 5:0: 15525, 3:0: 15141
not_min 0 1.00 FALSE 63 Unk: 241572, Unk: 30199, 30: 5454, 0: 5422
numoccs 50732 0.90 FALSE 55 1: 224904, 2: 121322, 3: 51776, 4: 32278
owner 50732 0.90 FALSE 9 Dri: 247301, Dri: 148210, Veh: 30708, Veh: 15850
p_crash1 50732 0.90 FALSE 20 Goi: 295972, Neg: 79776, Tur: 32978, Sto: 18705
p_crash2 50732 0.90 FALSE 62 Fro: 57996, Ped: 45703, Off: 44846, Off: 30891
p_crash3 50732 0.90 FALSE 14 Unk: 297982, No : 95016, Ste: 21004, Ste: 18990
pbcwalk 474765 0.09 FALSE 3 Non: 39397, Yes: 8165, Unk: 351
pbswalk 474765 0.09 FALSE 3 Non: 30578, Yes: 16761, Unk: 574
pbszone 474765 0.09 FALSE 3 Non: 47327, Unk: 294, Yes: 292
pcrash4 50732 0.90 FALSE 8 Tra: 350009, Pre: 68723, Ski: 20830, Ski: 15633
pcrash5 50732 0.90 FALSE 9 Sta: 271825, Dep: 120598, Sta: 55184, Sta: 9126
pedcgp 474765 0.09 FALSE 20 Cro: 15358, Wal: 5784, Not: 5530, Oth: 5092
pedctype 474765 0.09 FALSE 62 Ped: 13363, Not: 5530, Not: 3582, Wal: 3362
peddir 474765 0.09 FALSE 8 Not: 31927, Not: 5530, Not: 3239, Eas: 1765
pedleg 474765 0.09 FALSE 5 Not: 31757, Not: 5530, Far: 5426, Nea: 3885
pedloc 474765 0.09 FALSE 6 Not: 30792, At : 7208, Not: 5530, Int: 3568
pedpos 474765 0.09 FALSE 10 Tra: 29429, Not: 5530, Cro: 5451, Pav: 1997
pedsnr 474765 0.09 FALSE 73 Not: 31928, Not: 5530, Unk: 1202, Mot: 654
per_typ 0 1.00 FALSE 15 Dri: 323427, Pas: 147823, Ped: 41248, Bic: 5493
prev_acc 50732 0.90 FALSE 15 Non: 345396, 1: 57440, Cra: 33676, Unk: 17559
prev_dwi 50732 0.90 FALSE 12 Non: 440079, Unk: 17556, 1: 11580, 2: 1809
prev_oth 50732 0.90 FALSE 36 Non: 368342, 1: 55633, Unk: 17555, 2: 16569
prev_spd 50732 0.90 FALSE 16 Non: 369009, 1: 58611, Unk: 17555, 2: 16745
prev_sus 366350 0.30 FALSE 48 Non: 128145, 1: 10726, Unk: 5154, 2: 4837
race 265986 0.49 FALSE 20 Not: 144548, Whi: 83630, Bla: 16938, Unk: 4883
rail 1233 1.00 FALSE 2 Not: 521411, Unk: 34
rd_owner 0 1.00 FALSE 27 Sta: 306670, Unk: 85236, Cit: 60737, Cou: 50913
reg_stat 50732 0.90 FALSE 65 Tex: 46241, Cal: 46129, Flo: 39472, Geo: 18392
rel_road 0 1.00 FALSE 12 On : 388443, On : 96197, On : 16036, On : 8208
reljct1 0 1.00 FALSE 3 No: 495990, Yes: 26299, Unk: 389
reljct2 0 1.00 FALSE 14 Non: 330412, Int: 104116, Int: 45605, Dri: 17964
rest_mis 0 1.00 FALSE 8 No: 232700, Non: 98564, No : 89272, No : 50322
rest_use 0 1.00 FALSE 22 Sho: 264354, Non: 74315, Not: 48441, Unk: 45315
rolinloc 50732 0.90 FALSE 9 No : 391050, On : 47586, On : 17530, On : 7055
rollover 50732 0.90 FALSE 4 No : 391050, Rol: 63299, Rol: 11205, Rol: 6392
route 0 1.00 FALSE 9 Sta: 153911, U.S: 92625, Loc: 82527, Int: 74929
rur_urb 0 1.00 FALSE 4 Urb: 306227, Rur: 214883, Tra: 928, Unk: 640
sch_bus 0 1.00 FALSE 2 No: 520186, Yes: 2492
seat_pos 0 1.00 FALSE 29 Fro: 324596, Fro: 76925, Not: 48441, Sec: 23173
sex 0 1.00 FALSE 3 Mal: 341869, Fem: 170437, Unk: 10372
sp_jur 0 1.00 FALSE 8 No : 517939, Ind: 3109, Nat: 976, Unk: 433
spec_use 50732 0.90 FALSE 22 No : 377751, No : 85413, Veh: 2984, Unk: 2125
speedrel 50732 0.90 FALSE 7 No: 366483, Yes: 37746, Yes: 33537, Unk: 19332
statename 0 1.00 FALSE 51 Cal: 55696, Tex: 53949, Flo: 47163, Geo: 22077
tow_veh 50732 0.90 FALSE 8 No : 443707, One: 25663, Unk: 1174, Two: 853
towed 50732 0.90 FALSE 6 Tow: 347066, Not: 56307, Tow: 24949, Tow: 23089
trav_sp 50732 0.90 FALSE 153 Unk: 284172, 055: 22423, Sto: 20834, 045: 17015
trlr1vin 63663 0.88 FALSE 4 No : 445380, Unk: 13424, No : 183, No : 28
trlr2vin 51058 0.90 FALSE 3 No : 471076, Unk: 543, No : 1
trlr3vin 50756 0.90 FALSE 2 No : 471883, Unk: 39
tway_id 0 1.00 FALSE 72194 I-1: 5591, I-9: 4180, I-7: 3394, I-4: 3318
tway_id2 367771 0.30 FALSE 38013 MAI: 455, NON: 160, CEN: 142, 7TH: 133
typ_int 0 1.00 FALSE 10 Not: 372200, Fou: 97523, T-I: 47143, Y-I: 3253
underide 137876 0.74 FALSE 10 No : 381191, Und: 1097, Ove: 749, Und: 729
unittype 50732 0.90 FALSE 1 Mot: 471946
v_config 50732 0.90 FALSE 15 Not: 426547, Tru: 18181, Sin: 9727, Unk: 4422
valign 50732 0.90 FALSE 8 Str: 367432, Cur: 30454, Cur: 25236, Cur: 15692
vin 510550 0.02 FALSE 3 Unk: 10671, No : 1144, No : 313
vnum_lan 50732 0.90 FALSE 9 Two: 296660, Thr: 58983, Fou: 51028, Fiv: 36656
vpavetyp 50732 0.90 FALSE 8 Bla: 303194, Unk: 122834, Con: 36294, Sla: 3739
vprofile 50732 0.90 FALSE 8 Lev: 330037, Gra: 44537, Unk: 38376, Dow: 24757
vspd_lim 50732 0.90 FALSE 19 55 : 118793, 45 : 71657, 35 : 48139, 65 : 45620
vsurcond 50732 0.90 FALSE 12 Dry: 395162, Wet: 54702, Unk: 6446, Ice: 4695
vtcont_f 50732 0.90 FALSE 6 No : 355843, Dev: 93920, Unk: 21545, Dev: 308
vtrafcon 50732 0.90 FALSE 18 No : 355843, Tra: 39813, Sto: 26018, Unk: 21348
vtrafway 50732 0.90 FALSE 9 Two: 258760, Two: 80676, Two: 71930, Two: 28616
work_inj 0 1.00 FALSE 5 Not: 292233, No: 205307, Unk: 18737, Yes: 5702
wrk_zone 0 1.00 FALSE 5 Non: 509615, Con: 7745, Wor: 4225, Mai: 877
nmhelmet 481932 0.08 FALSE 3 Unk: 23286, No: 16636, Yes: 824
nmlight 481932 0.08 FALSE 3 Unk: 26831, No: 13561, Yes: 354
nmothpre 481932 0.08 FALSE 3 Unk: 27177, No: 13431, Yes: 138
nmothpro 481932 0.08 FALSE 3 Unk: 27217, No: 13453, Yes: 76
nmpropad 481932 0.08 FALSE 3 Unk: 26614, No: 14112, Yes: 20
nmrefclo 481932 0.08 FALSE 3 Unk: 25136, No: 15084, Yes: 526
prev_sus1 207060 0.60 FALSE 15 Non: 302190, Unk: 12395, 1: 460, No : 372
prev_sus2 207060 0.60 FALSE 30 Non: 297325, Unk: 12395, 1: 3789, 2: 1106
prev_sus3 207060 0.60 FALSE 54 Non: 264545, 1: 18930, Unk: 12401, 2: 8078
helm_mis 256692 0.51 FALSE 6 Non: 229582, Not: 25219, No : 7083, No : 3946
helm_use 256692 0.51 FALSE 8 Not: 219822, Not: 25219, No : 8822, Hel: 5782
gvwr_from 357744 0.32 FALSE 9 Cla: 110348, Cla: 37563, Cla: 8082, Unk: 3131
gvwr_to 357744 0.32 FALSE 9 Cla: 109817, Cla: 38090, Cla: 8305, Unk: 3132
icfinalbody 357744 0.32 FALSE 17 Not: 162754, Tru: 889, Pic: 361, Tru: 169
trlr1gvwr 357744 0.32 FALSE 11 No : 155554, Unk: 6725, Cla: 1208, Cla: 502
trlr2gvwr 357744 0.32 FALSE 10 No : 164639, Unk: 237, Cla: 23, Cla: 10
trlr3gvwr 357744 0.32 FALSE 3 No : 164921, Unk: 11, Cla: 2
vpicbodyclass 357744 0.32 FALSE 65 Sed: 47146, Spo: 39035, Pic: 27303, Tru: 6792
vpicmake 357744 0.32 FALSE 267 Che: 22379, For: 22313, Toy: 14386, Hon: 12571
vpicmodel 357859 0.32 FALSE 2844 Sil: 5623, F-1: 4764, Unk: 3889, Acc: 3584
underoverride 435534 0.17 FALSE 5 Not: 42052, No : 39336, Unk: 4306, Und: 846

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
V1 0 1.00 261339.50 150884.29 1.0000e+00 130670.25 261339.50 392008.75 522678 ▇▇▇▇▇
year 0 1.00 2018.55 1.74 2.0160e+03 2017.00 2019.00 2020.00 2021 ▇▃▃▅▅
st_case 0 1.00 269756.34 164172.20 1.0001e+04 121693.00 260621.00 420429.75 560121 ▆▇▅▆▇
id 0 1.00 1657080743.98 725589313.86 2.0161e+08 2016260866.00 2018220018.00 2020180527.00 2021560104 ▂▁▁▁▇
veh_no 0 1.00 1.37 1.64 0.0000e+00 1.00 1.00 2.00 130 ▇▁▁▁▁
per_no 0 1.00 1.54 1.57 1.0000e+00 1.00 1.00 2.00 61 ▇▁▁▁▁
county 0 1.00 92.37 97.75 0.0000e+00 31.00 71.00 115.00 999 ▇▁▁▁▁
city 0 1.00 1340.22 2085.56 0.0000e+00 0.00 110.00 2023.00 9999 ▇▂▁▁▁
lon 0 1.00 -89.11 64.11 -1.7420e+02 -98.90 -88.06 -81.64 1000 ▇▁▁▁▁
lat 0 1.00 36.56 6.24 1.9040e+01 32.89 36.06 40.28 100 ▆▇▁▁▁
day 0 1.00 15.66 8.82 1.0000e+00 8.00 16.00 23.00 31 ▇▆▇▇▆
deaths 50732 0.90 0.64 0.72 0.0000e+00 0.00 1.00 1.00 18 ▇▁▁▁▁
drunk_dr 96747 0.81 0.25 0.46 0.0000e+00 0.00 0.00 0.00 4 ▇▂▁▁▁
fatals 0 1.00 1.16 0.55 1.0000e+00 1.00 1.00 1.00 20 ▇▁▁▁▁
model 50732 0.90 348.54 296.18 1.0000e+00 37.00 403.00 481.00 999 ▇▁▇▂▂
peds 0 1.00 0.25 0.57 0.0000e+00 0.00 0.00 0.00 23 ▇▁▁▁▁
permvit 0 1.00 3.43 4.46 0.0000e+00 2.00 3.00 4.00 133 ▇▁▁▁▁
pernotmvit 0 1.00 0.27 0.62 0.0000e+00 0.00 0.00 0.00 23 ▇▁▁▁▁
persons 0 1.00 3.45 4.46 0.0000e+00 2.00 3.00 4.00 133 ▇▁▁▁▁
pvh_invl 0 1.00 0.05 0.32 0.0000e+00 0.00 0.00 0.00 20 ▇▁▁▁▁
str_veh 0 1.00 0.10 0.31 0.0000e+00 0.00 0.00 0.00 35 ▇▁▁▁▁
ve_forms 0 1.00 1.91 2.65 1.0000e+00 1.00 2.00 2.00 130 ▇▁▁▁▁
ve_total 0 1.00 1.96 2.66 1.0000e+00 1.00 2.00 2.00 130 ▇▁▁▁▁
fars1= fars[, c("id", "ve_total", "veh_no", "per_no", "state", "age", "body_typ", "bikecgp", 
                "bikectype", "bikepos", "bus_use", "day_week", "dr_drink", "drugs",
                "ejection", "emer_use", "func_sys", "harm_ev","helm_use", "inj_sev", "j_knife",
                "l_compl","l_status", "l_type", "lgt_cond", "location", "m_harm",
                "mak_mod", "make", "mod_year", "month", "numoccs", "msafeqmt", "pedcgp",
                "pedloc", "pedpos", "pedctype", "per_typ", "prev_acc", "prev_dwi", "prev_spd",
                "prev_sus", "race", "rel_road", "reljct1", "rest_use","rolinloc", "rollover", "rur_urb", "sch_bus",
                "sex", "speedrel", "trav_sp", "typ_int", "unittype", "valign", "vnum_lan",
                "vpavetyp", "vspd_lim", "vtrafcon", "vtrafway", "vsurcond",  "vprofile", "wrk_zone", 
                "deaths", "route")]
table(fars1$wrk_zone)
## 
##            Construction             Maintenance                    None 
##                    7745                     877                  509615 
##                 Utility Work Zone, Type Unknown 
##                     216                    4225
table(fars$ve_total)
## 
##      1      2      3      4      5      6      7      8      9     10     11 
## 208470 226069  55847  17293   7393   3007   1766    882    498    373    146 
##     12     13     14     15     16     19     21     24     27     28     32 
##    218     18     19     24     61     25     50     26     86     31     47 
##     59     64    130 
##     74    120    135
wz= subset(fars1, wrk_zone!="None")
dim(wz)
## [1] 13063    66
skim(wz)
Data summary
Name wz
Number of rows 13063
Number of columns 66
Key NULL
_______________________
Column type frequency:
factor 61
numeric 5
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
state 0 1.00 FALSE 51 Tex: 2646, Cal: 1315, Flo: 1071, Geo: 757
age 0 1.00 FALSE 100 23 : 286, 24 : 279, 25 : 279, 29 : 279
body_typ 1260 0.90 FALSE 65 4-d: 2758, Com: 1593, Lig: 1422, Tru: 1305
bikecgp 12064 0.08 FALSE 14 Not: 937, Mot: 17, Bic: 7, Oth: 7
bikectype 12064 0.08 FALSE 26 Not: 937, Mot: 9, Unk: 7, Bic: 5
bikepos 12064 0.08 FALSE 6 Not: 937, Tra: 50, Sid: 6, Unk: 3
bus_use 1260 0.90 FALSE 6 Not: 11554, Cha: 118, Unk: 97, Tra: 23
day_week 0 1.00 FALSE 7 Fri: 2109, Tue: 1973, Thu: 1934, Sat: 1880
dr_drink 1260 0.90 FALSE 2 No: 10274, Yes: 1529
drugs 0 1.00 FALSE 3 Unk: 7299, No : 5044, Yes: 720
ejection 0 1.00 FALSE 8 Not: 10444, Not: 1750, Tot: 611, Par: 136
emer_use 1260 0.90 FALSE 6 Not: 11751, Unk: 24, Eme: 10, Eme: 9
func_sys 0 1.00 FALSE 9 Int: 5838, Pri: 3405, Min: 1509, Pri: 1212
harm_ev 0 1.00 FALSE 49 Mot: 7814, Ped: 1672, Rol: 560, Con: 341
helm_use 6135 0.53 FALSE 8 Not: 6006, Not: 543, No : 143, Hel: 119
inj_sev 0 1.00 FALSE 9 Fat: 5011, No : 3942, Sus: 1339, Pos: 1078
j_knife 1260 0.90 FALSE 4 Not: 10248, No: 1452, Yes: 87, Yes: 16
l_compl 1260 0.90 FALSE 7 Val: 9827, No : 945, Not: 591, Unk: 387
l_status 1260 0.90 FALSE 8 Val: 10018, Not: 591, Sus: 535, Unk: 372
l_type 1260 0.90 FALSE 7 Ful: 10636, Not: 591, Unk: 372, Int: 89
lgt_cond 0 1.00 FALSE 8 Day: 7143, Dar: 3236, Dar: 2137, Daw: 223
location 0 1.00 FALSE 20 Occ: 12063, Not: 612, Sho: 131, Non: 59
m_harm 1260 0.90 FALSE 50 Mot: 7715, Ped: 1025, Rol: 1021, Mot: 279
mak_mod 1260 0.90 FALSE 635 Fre: 563, For: 501, Che: 355, Har: 352
make 1260 0.90 FALSE 61 For: 1644, Che: 1477, Toy: 927, Hon: 778
mod_year 12882 0.01 FALSE 1 Unk: 181
month 0 1.00 FALSE 12 Jul: 1505, Aug: 1418, Oct: 1348, Sep: 1310
numoccs 1260 0.90 FALSE 18 1: 5761, 2: 2928, 3: 1162, 4: 840
msafeqmt 12916 0.01 FALSE 6 Unk: 78, Non: 35, Ref: 29, Hel: 2
pedcgp 12064 0.08 FALSE 19 Unu: 196, Cro: 140, Wor: 140, Oth: 110
pedloc 12064 0.08 FALSE 6 Not: 768, At : 62, Non: 62, Not: 62
pedpos 12064 0.08 FALSE 10 Tra: 621, Pav: 102, Not: 62, Oth: 59
pedctype 12064 0.08 FALSE 47 Wor: 139, Ped: 127, Not: 99, Cro: 90
per_typ 0 1.00 FALSE 13 Dri: 8092, Pas: 3676, Ped: 925, Occ: 260
prev_acc 1260 0.90 FALSE 10 Non: 8607, 1: 1406, Cra: 976, Unk: 449
prev_dwi 1260 0.90 FALSE 7 Non: 11052, Unk: 449, 1: 235, 2: 36
prev_spd 1260 0.90 FALSE 12 Non: 9118, 1: 1554, Unk: 449, 2: 416
prev_sus 9258 0.29 FALSE 23 Non: 3220, 1: 221, Unk: 132, 2: 79
race 6928 0.47 FALSE 19 Not: 3786, Whi: 1749, Bla: 377, Unk: 90
rel_road 0 1.00 FALSE 12 On : 10094, On : 1480, On : 593, On : 368
reljct1 0 1.00 FALSE 3 No: 11716, Yes: 1341, Unk: 6
rest_use 0 1.00 FALSE 21 Sho: 7531, Non: 1476, Unk: 1147, Not: 1000
rolinloc 1260 0.90 FALSE 8 No : 10244, On : 679, On : 493, On : 230
rollover 1260 0.90 FALSE 4 No : 10244, Rol: 1257, Rol: 186, Rol: 116
rur_urb 0 1.00 FALSE 3 Urb: 7838, Rur: 5221, Tra: 4, Unk: 0
sch_bus 0 1.00 FALSE 2 No: 13038, Yes: 25
sex 0 1.00 FALSE 3 Mal: 8884, Fem: 3955, Unk: 224
speedrel 1260 0.90 FALSE 7 No: 9476, Yes: 1059, Yes: 625, Yes: 326
trav_sp 1260 0.90 FALSE 115 Unk: 6535, Sto: 1778, 070: 311, 065: 309
typ_int 0 1.00 FALSE 7 Not: 11388, Fou: 1094, T-I: 501, Y-I: 42
unittype 1260 0.90 FALSE 1 Mot: 11803
valign 1260 0.90 FALSE 8 Str: 10101, Cur: 506, Cur: 488, Cur: 194
vnum_lan 1260 0.90 FALSE 9 Two: 6758, Thr: 1926, Fou: 1430, Fiv: 707
vpavetyp 1260 0.90 FALSE 8 Bla: 6136, Unk: 3796, Con: 1750, Non: 53
vspd_lim 1260 0.90 FALSE 19 55 : 2967, 65 : 1788, 70 : 1680, 45 : 1372
vtrafcon 1260 0.90 FALSE 17 No : 7483, War: 1280, Oth: 671, Unk: 575
vtrafway 1260 0.90 FALSE 9 Two: 5136, Two: 3424, Two: 2105, Two: 331
vsurcond 1260 0.90 FALSE 11 Dry: 10610, Wet: 874, Unk: 93, Mud: 53
vprofile 1260 0.90 FALSE 8 Lev: 8514, Gra: 1208, Unk: 758, Dow: 521
wrk_zone 0 1.00 FALSE 4 Con: 7745, Wor: 4225, Mai: 877, Uti: 216
route 0 1.00 FALSE 9 Int: 5801, Sta: 2953, U.S: 2289, Loc: 828

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.0 1.678807e+09 7.08728e+08 201610013 2016340268 2018240371 2020170966 2021560064 ▂▁▁▁▇
ve_total 0 1.0 2.580000e+00 1.73000e+00 1 2 2 3 16 ▇▁▁▁▁
veh_no 0 1.0 1.680000e+00 1.27000e+00 0 1 1 2 16 ▇▁▁▁▁
per_no 0 1.0 1.620000e+00 2.02000e+00 1 1 1 2 43 ▇▁▁▁▁
deaths 1260 0.9 6.300000e-01 1.04000e+00 0 0 0 1 13 ▇▁▁▁▁
wz1= wz[, c("day_week", "ejection", "dr_drink", "harm_ev", "func_sys", "lgt_cond", "speedrel")]
skim(wz1)
Data summary
Name wz1
Number of rows 13063
Number of columns 7
Key NULL
_______________________
Column type frequency:
factor 7
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
day_week 0 1.0 FALSE 7 Fri: 2109, Tue: 1973, Thu: 1934, Sat: 1880
ejection 0 1.0 FALSE 8 Not: 10444, Not: 1750, Tot: 611, Par: 136
dr_drink 1260 0.9 FALSE 2 No: 10274, Yes: 1529
harm_ev 0 1.0 FALSE 49 Mot: 7814, Ped: 1672, Rol: 560, Con: 341
func_sys 0 1.0 FALSE 9 Int: 5838, Pri: 3405, Min: 1509, Pri: 1212
lgt_cond 0 1.0 FALSE 8 Day: 7143, Dar: 3236, Dar: 2137, Daw: 223
speedrel 1260 0.9 FALSE 7 No: 9476, Yes: 1059, Yes: 625, Yes: 326
factor_columns= c("day_week", "ejection", "dr_drink", "harm_ev", "func_sys", "lgt_cond", "speedrel")
wz2= wz1 %>%
  mutate(across(factor_columns, fct_lump_prop, prop = 0.1,other_level = 'other'))
skim(wz2)
Data summary
Name wz2
Number of rows 13063
Number of columns 7
Key NULL
_______________________
Column type frequency:
factor 7
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
day_week 0 1.0 FALSE 7 Fri: 2109, Tue: 1973, Thu: 1934, Sat: 1880
ejection 0 1.0 FALSE 3 Not: 10444, Not: 1750, oth: 869
dr_drink 1260 0.9 FALSE 2 No: 10274, Yes: 1529
harm_ev 0 1.0 FALSE 3 Mot: 7814, oth: 3577, Ped: 1672
func_sys 0 1.0 FALSE 4 Int: 5838, Pri: 3405, oth: 2311, Min: 1509
lgt_cond 0 1.0 FALSE 4 Day: 7143, Dar: 3236, Dar: 2137, oth: 547
speedrel 1260 0.9 FALSE 2 No: 9476, oth: 2327

2 Crash Involved with 130 Vehicles

a135= subset(fars1, ve_total==130)
datatable(a135)