library(fst)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(SmartEDA)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(DT)
setwd("D:/Data_Archive/Crash Data/TX/2025Analysis/Data")
dat= read.fst("Crash2017_2023Cat.fst")
dim(dat)
## [1] 4353302 177
unt= read.fst("Unit2017_2023Cat.fst")
dim(unt)
## [1] 8724534 117
ppr= read.fst("PrimaryPerson2017_2023Cat.fst")
dim(ppr)
## [1] 7961292 53
cit= read.fst("Citation2017_2023Cat.fst")
dim(cit)
## [1] 2552160 10
names(dat)
## [1] "Crash_ID" "Crash_Fatal_Fl"
## [3] "Cmv_Involv_Fl" "Schl_Bus_Fl"
## [5] "Rr_Relat_Fl" "Medical_Advisory_Fl"
## [7] "Amend_Supp_Fl" "Active_School_Zone_Fl"
## [9] "Crash_Date" "Crash_Time"
## [11] "Case_ID" "Local_Use"
## [13] "Rpt_CRIS_Cnty_ID" "Rpt_City_ID"
## [15] "Rpt_Outside_City_Limit_Fl" "Thousand_Damage_Fl"
## [17] "Rpt_Latitude" "Rpt_Longitude"
## [19] "Rpt_Rdwy_Sys_ID" "Rpt_Hwy_Num"
## [21] "Rpt_Hwy_Sfx" "Rpt_Road_Part_ID"
## [23] "Rpt_Block_Num" "Rpt_Street_Pfx"
## [25] "Rpt_Street_Name" "Rpt_Street_Sfx"
## [27] "Private_Dr_Fl" "Toll_Road_Fl"
## [29] "Crash_Speed_Limit" "Road_Constr_Zone_Fl"
## [31] "Road_Constr_Zone_Wrkr_Fl" "Rpt_Street_Desc"
## [33] "At_Intrsct_Fl" "Rpt_Sec_Rdwy_Sys_ID"
## [35] "Rpt_Sec_Hwy_Num" "Rpt_Sec_Hwy_Sfx"
## [37] "Rpt_Sec_Road_Part_ID" "Rpt_Sec_Block_Num"
## [39] "Rpt_Sec_Street_Pfx" "Rpt_Sec_Street_Name"
## [41] "Rpt_Sec_Street_Sfx" "Rpt_Ref_Mark_Offset_Amt"
## [43] "Rpt_Ref_Mark_Dist_Uom" "Rpt_Ref_Mark_Dir"
## [45] "Rpt_Ref_Mark_Nbr" "Rpt_Sec_Street_Desc"
## [47] "Rpt_CrossingNumber" "Wthr_Cond_ID"
## [49] "Light_Cond_ID" "Entr_Road_ID"
## [51] "Road_Type_ID" "Road_Algn_ID"
## [53] "Surf_Cond_ID" "Traffic_Cntl_ID"
## [55] "Investigat_Notify_Time" "Investigat_Notify_Meth"
## [57] "Investigat_Arrv_Time" "Report_Date"
## [59] "Investigat_Comp_Fl" "Investigator_Name"
## [61] "ID_Number" "ORI_Number"
## [63] "Investigat_Agency_ID" "Investigat_Area_ID"
## [65] "Investigat_District_ID" "Investigat_Region_ID"
## [67] "Bridge_Detail_ID" "Harm_Evnt_ID"
## [69] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [71] "Obj_Struck_ID" "Othr_Factr_ID"
## [73] "Road_Part_Adj_ID" "Road_Cls_ID"
## [75] "Road_Relat_ID" "Phys_Featr_1_ID"
## [77] "Phys_Featr_2_ID" "Cnty_ID"
## [79] "City_ID" "Latitude"
## [81] "Longitude" "Hwy_Sys"
## [83] "Hwy_Nbr" "Hwy_Sfx"
## [85] "Dfo" "Street_Name"
## [87] "Street_Nbr" "Control"
## [89] "Section" "Milepoint"
## [91] "Ref_Mark_Nbr" "Ref_Mark_Displ"
## [93] "Hwy_Sys_2" "Hwy_Nbr_2"
## [95] "Hwy_Sfx_2" "Street_Name_2"
## [97] "Street_Nbr_2" "Control_2"
## [99] "Section_2" "Milepoint_2"
## [101] "Txdot_Rptable_Fl" "Onsys_Fl"
## [103] "Rural_Fl" "Crash_Sev_ID"
## [105] "Pop_Group_ID" "Located_Fl"
## [107] "Day_of_Week" "Hwy_Dsgn_Lane_ID"
## [109] "Hwy_Dsgn_Hrt_ID" "Hp_Shldr_Left"
## [111] "Hp_Shldr_Right" "Hp_Median_Width"
## [113] "Base_Type_ID" "Nbr_Of_Lane"
## [115] "Row_Width_Usual" "Roadbed_Width"
## [117] "Surf_Width" "Surf_Type_ID"
## [119] "Curb_Type_Left_ID" "Curb_Type_Right_ID"
## [121] "Shldr_Type_Left_ID" "Shldr_Width_Left"
## [123] "Shldr_Use_Left_ID" "Shldr_Type_Right_ID"
## [125] "Shldr_Width_Right" "Shldr_Use_Right_ID"
## [127] "Median_Type_ID" "Median_Width"
## [129] "Rural_Urban_Type_ID" "Func_Sys_ID"
## [131] "Adt_Curnt_Amt" "Adt_Curnt_Year"
## [133] "Adt_Adj_Curnt_Amt" "Pct_Single_Trk_Adt"
## [135] "Pct_Combo_Trk_Adt" "Trk_Aadt_Pct"
## [137] "Curve_Type_ID" "Curve_Lngth"
## [139] "Cd_Degr" "Delta_Left_Right_ID"
## [141] "Dd_Degr" "Feature_Crossed"
## [143] "Structure_Number" "I_R_Min_Vert_Clear"
## [145] "Approach_Width" "Bridge_Median_ID"
## [147] "Bridge_Loading_Type_ID" "Bridge_Loading_In_1000_Lbs"
## [149] "Bridge_Srvc_Type_On_ID" "Bridge_Srvc_Type_Under_ID"
## [151] "Culvert_Type_ID" "Roadway_Width"
## [153] "Deck_Width" "Bridge_Dir_Of_Traffic_ID"
## [155] "Bridge_Rte_Struct_Func_ID" "Bridge_IR_Struct_Func_ID"
## [157] "CrossingNumber" "RRCo"
## [159] "Poscrossing_ID" "WDCode_ID"
## [161] "Standstop" "Yield"
## [163] "Sus_Serious_Injry_Cnt" "Nonincap_Injry_Cnt"
## [165] "Poss_Injry_Cnt" "Non_Injry_Cnt"
## [167] "Unkn_Injry_Cnt" "Tot_Injry_Cnt"
## [169] "Death_Cnt" "MPO_ID"
## [171] "Investigat_Service_ID" "Investigat_DA_ID"
## [173] "Investigator_Narrative" "near_Trauma_Center_ID"
## [175] "near_Trauma_Center_Distance" "Year"
## [177] "V1"
crs= dat
crs1= crs[, c("Crash_ID", "Crash_Date", "Crash_Time",
"Crash_Speed_Limit", "Wthr_Cond_ID", "Light_Cond_ID",
"Road_Type_ID", "Road_Algn_ID", "Surf_Cond_ID",
"Traffic_Cntl_ID", "Harm_Evnt_ID", "Intrsct_Relat_ID",
"FHE_Collsn_ID", "Obj_Struck_ID", "Othr_Factr_ID",
"Cnty_ID", "City_ID", "Latitude", "Longitude",
"Crash_Sev_ID", "Pop_Group_ID", "Day_of_Week",
"Tot_Injry_Cnt", "Death_Cnt", "Investigator_Narrative",
"Year")]
head(crs1[, -c(25)])
## Crash_ID Crash_Date Crash_Time Crash_Speed_Limit Wthr_Cond_ID Light_Cond_ID
## 1 14570071 08/01/2017 11:00 PM 65 Clear Dark, lighted
## 2 14881309 02/17/2017 07:00 PM 15 Clear Dusk
## 3 14963691 07/15/2017 05:56 AM 50 Clear Dark, lighted
## 4 14963927 03/09/2017 11:43 PM 35 Clear Dark, lighted
## 5 14990459 03/26/2017 11:45 AM 40 Clear Daylight
## 6 15138336 06/03/2017 02:19 AM 35 Clear Dark, lighted
## Road_Type_ID Road_Algn_ID Surf_Cond_ID Traffic_Cntl_ID
## 1 4 or more lanes, undivided Curve, level Dry Center stripe/divider
## 2 2 lane, 2 way Straight, level Dry Marked lanes
## 3 <NA> Straight, level Dry Signal light
## 4 4 or more lanes, divided Straight, level Dry Marked lanes
## 5 2 lane, 2 way Straight, level Dry Marked lanes
## 6 4 or more lanes, divided Straight, level Dry Marked lanes
## Harm_Evnt_ID Intrsct_Relat_ID
## 1 Fixed object Non intersection
## 2 Motor vehicle in transport Non intersection
## 3 Motor vehicle in transport Intersection
## 4 Fixed object Non intersection
## 5 Fixed object Non intersection
## 6 Motor vehicle in transport Intersection related
## FHE_Collsn_ID Obj_Struck_ID
## 1 Omv vehicle going straight Hit median barrier
## 2 Od one straight-one backing Not applicable
## 3 Angle - both going straight Not applicable
## 4 Omv vehicle going straight Hit median barrier
## 5 Omv vehicle going straight Hit utility pole
## 6 Sd both going straight-rear end Not applicable
## Othr_Factr_ID Cnty_ID City_ID Latitude Longitude
## 1 Swerved or veered-reason not specified 71 El paso 31.78471 -106.51844
## 2 Not applicable 71 Socorro 31.65162 -106.26752
## 3 Not applicable 101 Houston 29.63043 -95.17126
## 4 Swerved or veered-reason not specified 57 Dallas 32.73907 -96.68287
## 5 Attention diverted from driving 101 Tomball 30.11203 -95.60361
## 6 Not applicable 57 Dallas 32.81256 -96.81321
## Crash_Sev_ID Pop_Group_ID Day_of_Week Tot_Injry_Cnt Death_Cnt
## 1 Non-incapacitating 250,000 pop. And over TUE 1 0
## 2 Not injured 25,000 - 49,999 pop FRI 0 0
## 3 Possible injury 250,000 pop. And over SAT 1 0
## 4 Possible injury 250,000 pop. And over THU 1 0
## 5 Not injured 10,000 - 24,999 pop SUN 0 0
## 6 Not injured 250,000 pop. And over SAT 0 0
## Year
## 1 2017
## 2 2017
## 3 2017
## 4 2017
## 5 2017
## 6 2017
table(crs1$Year)
##
## 2017 2018 2019 2020 2021 2022 2023
## 620868 629255 649106 546100 633712 635506 638755
crs1 <- crs1 %>% mutate_if(is.character, as.factor)
tab=ExpCTable(crs1[, -c(25)],Target=NULL,
round=2,bin=NULL,per=F,clim=200,nlim=200)
datatable(
tab, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c( 'csv', 'excel')
)
)
unt1= unt[, c("Crash_ID", "Unit_Nbr", "Veh_Mod_Year", "Veh_Make_ID",
"Veh_Mod_ID", "Veh_Body_Styl_ID", "Veh_Cmv_Fl",
"Cmv_Hazmat_Fl", "Contrib_Factr_1_ID",
"Pedestrian_Action_ID", "Pedalcyclist_Action_ID",
"PBCAT_Pedestrian_ID", "PBCAT_Pedalcyclist_ID",
"E_Scooter_ID", "Autonomous_Unit_ID", "Unit_ID")]
dim(unt1)
## [1] 8724534 16
head(unt1)
## Crash_ID Unit_Nbr Veh_Mod_Year Veh_Make_ID Veh_Mod_ID
## 1 14570071 1 2009 Ford Ford focus
## 2 14881309 1 2015 Chevrolet Chevrolet silverado (pickup)
## 3 14881309 2 2013 Hyundai Hyundai genesis
## 4 14963691 1 2001 Chevrolet Chevrolet tahoe (utility)
## 5 14963691 2 2013 Hyundai Hyundai tucson
## 6 14963691 3 2010 Toyota Toyota corolla
## Veh_Body_Styl_ID Veh_Cmv_Fl Cmv_Hazmat_Fl Contrib_Factr_1_ID
## 1 Passenger car, 2-door N <NA> Unsafe speed
## 2 Pickup N <NA> Backed without safety
## 3 Passenger car, 2-door N <NA> Not applicable
## 4 Sport utility vehicle N <NA> Disregard stop and go signal
## 5 Sport utility vehicle N <NA> Not applicable
## 6 Passenger car, 4-door N <NA> Not applicable
## Pedestrian_Action_ID Pedalcyclist_Action_ID PBCAT_Pedestrian_ID
## 1 95 95 95
## 2 95 95 95
## 3 95 95 95
## 4 95 95 95
## 5 95 95 95
## 6 95 95 95
## PBCAT_Pedalcyclist_ID E_Scooter_ID Autonomous_Unit_ID Unit_ID
## 1 95 95 95 14570071_1
## 2 95 95 95 14881309_1
## 3 95 95 95 14881309_2
## 4 95 95 95 14963691_1
## 5 95 95 95 14963691_2
## 6 95 95 95 14963691_3
unt1 <- unt1 %>% mutate_if(is.character, as.factor)
tab= ExpCTable(unt1,Target=NULL,
round=2,bin=NULL,per=F,clim=200,nlim=200)
datatable(
tab, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c( 'csv', 'excel')
)
)
ppr1= ppr[, c("Crash_ID", "Unit_Nbr", "Prsn_Nbr", "Prsn_Type_ID",
"Prsn_Occpnt_Pos_ID", "Prsn_Age", "Prsn_Ethnicity_ID",
"Prsn_Gndr_ID", "Prsn_Ejct_ID", "Prsn_Rest_ID",
"Prsn_Airbag_ID", "Prsn_Helmet_ID", "Prsn_Alc_Spec_Type_ID",
"Prsn_Alc_Rslt_ID", "Drvr_Lic_Type_ID", "Drvr_Lic_State_ID",
"Person_ID", "Person_Type")]
dim(ppr1)
## [1] 7961292 18
head(ppr1)
## Crash_ID Unit_Nbr Prsn_Nbr Prsn_Type_ID Prsn_Occpnt_Pos_ID Prsn_Age
## 1 14570071 1 1 Driver Front left 21
## 2 14881309 1 1 Driver Front left 55
## 3 14881309 2 1 Driver Front left 22
## 4 14963691 1 1 Driver Front left 65
## 5 14963691 2 1 Driver Front left 28
## 6 14963691 3 1 Driver Front left 36
## Prsn_Ethnicity_ID Prsn_Gndr_ID Prsn_Ejct_ID
## 1 White Male No
## 2 Hispanic Male No
## 3 Hispanic Male No
## 4 Black Male No
## 5 White Male No
## 6 Hispanic Male No
## Prsn_Rest_ID Prsn_Airbag_ID Prsn_Helmet_ID Prsn_Alc_Spec_Type_ID
## 1 Shoulder & lap belt Deployed, front Not applicable None
## 2 Unknown Unknown Not applicable None
## 3 Shoulder & lap belt Not deployed Not applicable None
## 4 Shoulder & lap belt Not deployed Not applicable Blood
## 5 Shoulder & lap belt Deployed, side Not applicable None
## 6 Shoulder & lap belt Not deployed Not applicable None
## Prsn_Alc_Rslt_ID Drvr_Lic_Type_ID Drvr_Lic_State_ID Person_ID
## 1 NA Driver license Tx 14570071_1_1
## 2 NA Unlicensed <NA> 14881309_1_1
## 3 NA Driver license Tx 14881309_2_1
## 4 1 Driver license Tx 14963691_1_1
## 5 NA Driver license Tx 14963691_2_1
## 6 NA Unlicensed <NA> 14963691_3_1
## Person_Type
## 1 PrimaryPerson
## 2 PrimaryPerson
## 3 PrimaryPerson
## 4 PrimaryPerson
## 5 PrimaryPerson
## 6 PrimaryPerson
ppr1 <- ppr1 %>% mutate_if(is.character, as.factor)
tab= ExpCTable(ppr1,Target=NULL,
round=2,bin=NULL,per=F,clim=200,nlim=200)
datatable(
tab, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c( 'csv', 'excel')
)
)
cit1= cit[, c("Crash_ID", "Unit_Nbr", "Prsn_Nbr", "Charge",
"Unit_ID", "Person_ID")]
dim(cit1)
## [1] 2552160 6
head(cit1)
## Crash_ID Unit_Nbr Prsn_Nbr Charge
## 1 14570071 1 1 DRIVING WHILE INTOXICATED
## 2 14881309 1 1 ACCIDENT INVOLVING DAMAGE TO VEHICLE >=$200
## 3 14963691 1 1 RAN RED LIGHT
## 4 14963691 1 1 DWI
## 5 14990459 1 1 DUI
## 6 15316938 1 1 DWI
## Unit_ID Person_ID
## 1 14570071_1 14570071_1_1
## 2 14881309_1 14881309_1_1
## 3 14963691_1 14963691_1_1
## 4 14963691_1 14963691_1_1
## 5 14990459_1 14990459_1_1
## 6 15316938_1 15316938_1_1
cit2 <- cit1 %>% mutate_if(is.character, as.factor)
tab= ExpCTable(cit2[, -c(1, 5, 6)],Target=NULL,
round=2,bin=NULL,per=F,clim=212020,nlim=212020)
datatable(
tab, extensions = 'Buttons', options = list(
dom = 'Bfrtip',
buttons = c( 'csv', 'excel')
)
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html