library(fst) 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(SmartEDA)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(DT)

setwd("D:/Data_Archive/Crash Data/TX/2025Analysis/Data")
dat= read.fst("Crash2017_2023Cat.fst")
dim(dat)
## [1] 4353302     177
unt= read.fst("Unit2017_2023Cat.fst")
dim(unt)
## [1] 8724534     117
ppr= read.fst("PrimaryPerson2017_2023Cat.fst")
dim(ppr)
## [1] 7961292      53
cit= read.fst("Citation2017_2023Cat.fst")
dim(cit)
## [1] 2552160      10
names(dat)
##   [1] "Crash_ID"                    "Crash_Fatal_Fl"             
##   [3] "Cmv_Involv_Fl"               "Schl_Bus_Fl"                
##   [5] "Rr_Relat_Fl"                 "Medical_Advisory_Fl"        
##   [7] "Amend_Supp_Fl"               "Active_School_Zone_Fl"      
##   [9] "Crash_Date"                  "Crash_Time"                 
##  [11] "Case_ID"                     "Local_Use"                  
##  [13] "Rpt_CRIS_Cnty_ID"            "Rpt_City_ID"                
##  [15] "Rpt_Outside_City_Limit_Fl"   "Thousand_Damage_Fl"         
##  [17] "Rpt_Latitude"                "Rpt_Longitude"              
##  [19] "Rpt_Rdwy_Sys_ID"             "Rpt_Hwy_Num"                
##  [21] "Rpt_Hwy_Sfx"                 "Rpt_Road_Part_ID"           
##  [23] "Rpt_Block_Num"               "Rpt_Street_Pfx"             
##  [25] "Rpt_Street_Name"             "Rpt_Street_Sfx"             
##  [27] "Private_Dr_Fl"               "Toll_Road_Fl"               
##  [29] "Crash_Speed_Limit"           "Road_Constr_Zone_Fl"        
##  [31] "Road_Constr_Zone_Wrkr_Fl"    "Rpt_Street_Desc"            
##  [33] "At_Intrsct_Fl"               "Rpt_Sec_Rdwy_Sys_ID"        
##  [35] "Rpt_Sec_Hwy_Num"             "Rpt_Sec_Hwy_Sfx"            
##  [37] "Rpt_Sec_Road_Part_ID"        "Rpt_Sec_Block_Num"          
##  [39] "Rpt_Sec_Street_Pfx"          "Rpt_Sec_Street_Name"        
##  [41] "Rpt_Sec_Street_Sfx"          "Rpt_Ref_Mark_Offset_Amt"    
##  [43] "Rpt_Ref_Mark_Dist_Uom"       "Rpt_Ref_Mark_Dir"           
##  [45] "Rpt_Ref_Mark_Nbr"            "Rpt_Sec_Street_Desc"        
##  [47] "Rpt_CrossingNumber"          "Wthr_Cond_ID"               
##  [49] "Light_Cond_ID"               "Entr_Road_ID"               
##  [51] "Road_Type_ID"                "Road_Algn_ID"               
##  [53] "Surf_Cond_ID"                "Traffic_Cntl_ID"            
##  [55] "Investigat_Notify_Time"      "Investigat_Notify_Meth"     
##  [57] "Investigat_Arrv_Time"        "Report_Date"                
##  [59] "Investigat_Comp_Fl"          "Investigator_Name"          
##  [61] "ID_Number"                   "ORI_Number"                 
##  [63] "Investigat_Agency_ID"        "Investigat_Area_ID"         
##  [65] "Investigat_District_ID"      "Investigat_Region_ID"       
##  [67] "Bridge_Detail_ID"            "Harm_Evnt_ID"               
##  [69] "Intrsct_Relat_ID"            "FHE_Collsn_ID"              
##  [71] "Obj_Struck_ID"               "Othr_Factr_ID"              
##  [73] "Road_Part_Adj_ID"            "Road_Cls_ID"                
##  [75] "Road_Relat_ID"               "Phys_Featr_1_ID"            
##  [77] "Phys_Featr_2_ID"             "Cnty_ID"                    
##  [79] "City_ID"                     "Latitude"                   
##  [81] "Longitude"                   "Hwy_Sys"                    
##  [83] "Hwy_Nbr"                     "Hwy_Sfx"                    
##  [85] "Dfo"                         "Street_Name"                
##  [87] "Street_Nbr"                  "Control"                    
##  [89] "Section"                     "Milepoint"                  
##  [91] "Ref_Mark_Nbr"                "Ref_Mark_Displ"             
##  [93] "Hwy_Sys_2"                   "Hwy_Nbr_2"                  
##  [95] "Hwy_Sfx_2"                   "Street_Name_2"              
##  [97] "Street_Nbr_2"                "Control_2"                  
##  [99] "Section_2"                   "Milepoint_2"                
## [101] "Txdot_Rptable_Fl"            "Onsys_Fl"                   
## [103] "Rural_Fl"                    "Crash_Sev_ID"               
## [105] "Pop_Group_ID"                "Located_Fl"                 
## [107] "Day_of_Week"                 "Hwy_Dsgn_Lane_ID"           
## [109] "Hwy_Dsgn_Hrt_ID"             "Hp_Shldr_Left"              
## [111] "Hp_Shldr_Right"              "Hp_Median_Width"            
## [113] "Base_Type_ID"                "Nbr_Of_Lane"                
## [115] "Row_Width_Usual"             "Roadbed_Width"              
## [117] "Surf_Width"                  "Surf_Type_ID"               
## [119] "Curb_Type_Left_ID"           "Curb_Type_Right_ID"         
## [121] "Shldr_Type_Left_ID"          "Shldr_Width_Left"           
## [123] "Shldr_Use_Left_ID"           "Shldr_Type_Right_ID"        
## [125] "Shldr_Width_Right"           "Shldr_Use_Right_ID"         
## [127] "Median_Type_ID"              "Median_Width"               
## [129] "Rural_Urban_Type_ID"         "Func_Sys_ID"                
## [131] "Adt_Curnt_Amt"               "Adt_Curnt_Year"             
## [133] "Adt_Adj_Curnt_Amt"           "Pct_Single_Trk_Adt"         
## [135] "Pct_Combo_Trk_Adt"           "Trk_Aadt_Pct"               
## [137] "Curve_Type_ID"               "Curve_Lngth"                
## [139] "Cd_Degr"                     "Delta_Left_Right_ID"        
## [141] "Dd_Degr"                     "Feature_Crossed"            
## [143] "Structure_Number"            "I_R_Min_Vert_Clear"         
## [145] "Approach_Width"              "Bridge_Median_ID"           
## [147] "Bridge_Loading_Type_ID"      "Bridge_Loading_In_1000_Lbs" 
## [149] "Bridge_Srvc_Type_On_ID"      "Bridge_Srvc_Type_Under_ID"  
## [151] "Culvert_Type_ID"             "Roadway_Width"              
## [153] "Deck_Width"                  "Bridge_Dir_Of_Traffic_ID"   
## [155] "Bridge_Rte_Struct_Func_ID"   "Bridge_IR_Struct_Func_ID"   
## [157] "CrossingNumber"              "RRCo"                       
## [159] "Poscrossing_ID"              "WDCode_ID"                  
## [161] "Standstop"                   "Yield"                      
## [163] "Sus_Serious_Injry_Cnt"       "Nonincap_Injry_Cnt"         
## [165] "Poss_Injry_Cnt"              "Non_Injry_Cnt"              
## [167] "Unkn_Injry_Cnt"              "Tot_Injry_Cnt"              
## [169] "Death_Cnt"                   "MPO_ID"                     
## [171] "Investigat_Service_ID"       "Investigat_DA_ID"           
## [173] "Investigator_Narrative"      "near_Trauma_Center_ID"      
## [175] "near_Trauma_Center_Distance" "Year"                       
## [177] "V1"
crs= dat

crs1= crs[, c("Crash_ID", "Crash_Date", "Crash_Time", 
              "Crash_Speed_Limit", "Wthr_Cond_ID", "Light_Cond_ID", 
              "Road_Type_ID", "Road_Algn_ID", "Surf_Cond_ID", 
              "Traffic_Cntl_ID", "Harm_Evnt_ID", "Intrsct_Relat_ID", 
              "FHE_Collsn_ID", "Obj_Struck_ID", "Othr_Factr_ID", 
              "Cnty_ID", "City_ID", "Latitude", "Longitude", 
              "Crash_Sev_ID", "Pop_Group_ID", "Day_of_Week", 
              "Tot_Injry_Cnt", "Death_Cnt", "Investigator_Narrative", 
              "Year")]

head(crs1[, -c(25)])
##   Crash_ID Crash_Date Crash_Time Crash_Speed_Limit Wthr_Cond_ID Light_Cond_ID
## 1 14570071 08/01/2017   11:00 PM                65        Clear Dark, lighted
## 2 14881309 02/17/2017   07:00 PM                15        Clear          Dusk
## 3 14963691 07/15/2017   05:56 AM                50        Clear Dark, lighted
## 4 14963927 03/09/2017   11:43 PM                35        Clear Dark, lighted
## 5 14990459 03/26/2017   11:45 AM                40        Clear      Daylight
## 6 15138336 06/03/2017   02:19 AM                35        Clear Dark, lighted
##                 Road_Type_ID    Road_Algn_ID Surf_Cond_ID       Traffic_Cntl_ID
## 1 4 or more lanes, undivided    Curve, level          Dry Center stripe/divider
## 2              2 lane, 2 way Straight, level          Dry          Marked lanes
## 3                       <NA> Straight, level          Dry          Signal light
## 4   4 or more lanes, divided Straight, level          Dry          Marked lanes
## 5              2 lane, 2 way Straight, level          Dry          Marked lanes
## 6   4 or more lanes, divided Straight, level          Dry          Marked lanes
##                 Harm_Evnt_ID     Intrsct_Relat_ID
## 1               Fixed object     Non intersection
## 2 Motor vehicle in transport     Non intersection
## 3 Motor vehicle in transport         Intersection
## 4               Fixed object     Non intersection
## 5               Fixed object     Non intersection
## 6 Motor vehicle in transport Intersection related
##                     FHE_Collsn_ID      Obj_Struck_ID
## 1      Omv vehicle going straight Hit median barrier
## 2     Od one straight-one backing     Not applicable
## 3     Angle - both going straight     Not applicable
## 4      Omv vehicle going straight Hit median barrier
## 5      Omv vehicle going straight   Hit utility pole
## 6 Sd both going straight-rear end     Not applicable
##                            Othr_Factr_ID Cnty_ID City_ID Latitude  Longitude
## 1 Swerved or veered-reason not specified      71 El paso 31.78471 -106.51844
## 2                         Not applicable      71 Socorro 31.65162 -106.26752
## 3                         Not applicable     101 Houston 29.63043  -95.17126
## 4 Swerved or veered-reason not specified      57  Dallas 32.73907  -96.68287
## 5        Attention diverted from driving     101 Tomball 30.11203  -95.60361
## 6                         Not applicable      57  Dallas 32.81256  -96.81321
##         Crash_Sev_ID          Pop_Group_ID Day_of_Week Tot_Injry_Cnt Death_Cnt
## 1 Non-incapacitating 250,000 pop. And over         TUE             1         0
## 2        Not injured   25,000 - 49,999 pop         FRI             0         0
## 3    Possible injury 250,000 pop. And over         SAT             1         0
## 4    Possible injury 250,000 pop. And over         THU             1         0
## 5        Not injured   10,000 - 24,999 pop         SUN             0         0
## 6        Not injured 250,000 pop. And over         SAT             0         0
##   Year
## 1 2017
## 2 2017
## 3 2017
## 4 2017
## 5 2017
## 6 2017
table(crs1$Year)
## 
##   2017   2018   2019   2020   2021   2022   2023 
## 620868 629255 649106 546100 633712 635506 638755
crs1 <- crs1 %>% mutate_if(is.character, as.factor) 
tab=ExpCTable(crs1[, -c(25)],Target=NULL,
         round=2,bin=NULL,per=F,clim=200,nlim=200)
datatable(
  tab, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c( 'csv', 'excel')
  )
)
unt1= unt[, c("Crash_ID", "Unit_Nbr", "Veh_Mod_Year", "Veh_Make_ID", 
              "Veh_Mod_ID", "Veh_Body_Styl_ID", "Veh_Cmv_Fl", 
              "Cmv_Hazmat_Fl", "Contrib_Factr_1_ID", 
              "Pedestrian_Action_ID", "Pedalcyclist_Action_ID", 
              "PBCAT_Pedestrian_ID", "PBCAT_Pedalcyclist_ID", 
              "E_Scooter_ID", "Autonomous_Unit_ID", "Unit_ID")]
dim(unt1)
## [1] 8724534      16
head(unt1)
##   Crash_ID Unit_Nbr Veh_Mod_Year Veh_Make_ID                   Veh_Mod_ID
## 1 14570071        1         2009        Ford                   Ford focus
## 2 14881309        1         2015   Chevrolet Chevrolet silverado (pickup)
## 3 14881309        2         2013     Hyundai              Hyundai genesis
## 4 14963691        1         2001   Chevrolet    Chevrolet tahoe (utility)
## 5 14963691        2         2013     Hyundai               Hyundai tucson
## 6 14963691        3         2010      Toyota               Toyota corolla
##        Veh_Body_Styl_ID Veh_Cmv_Fl Cmv_Hazmat_Fl           Contrib_Factr_1_ID
## 1 Passenger car, 2-door          N          <NA>                 Unsafe speed
## 2                Pickup          N          <NA>        Backed without safety
## 3 Passenger car, 2-door          N          <NA>               Not applicable
## 4 Sport utility vehicle          N          <NA> Disregard stop and go signal
## 5 Sport utility vehicle          N          <NA>               Not applicable
## 6 Passenger car, 4-door          N          <NA>               Not applicable
##   Pedestrian_Action_ID Pedalcyclist_Action_ID PBCAT_Pedestrian_ID
## 1                   95                     95                  95
## 2                   95                     95                  95
## 3                   95                     95                  95
## 4                   95                     95                  95
## 5                   95                     95                  95
## 6                   95                     95                  95
##   PBCAT_Pedalcyclist_ID E_Scooter_ID Autonomous_Unit_ID    Unit_ID
## 1                    95           95                 95 14570071_1
## 2                    95           95                 95 14881309_1
## 3                    95           95                 95 14881309_2
## 4                    95           95                 95 14963691_1
## 5                    95           95                 95 14963691_2
## 6                    95           95                 95 14963691_3
unt1 <- unt1 %>% mutate_if(is.character, as.factor) 
tab= ExpCTable(unt1,Target=NULL,
          round=2,bin=NULL,per=F,clim=200,nlim=200)
datatable(
  tab, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c( 'csv', 'excel')
  )
)
ppr1= ppr[, c("Crash_ID", "Unit_Nbr", "Prsn_Nbr", "Prsn_Type_ID", 
              "Prsn_Occpnt_Pos_ID", "Prsn_Age", "Prsn_Ethnicity_ID", 
              "Prsn_Gndr_ID", "Prsn_Ejct_ID", "Prsn_Rest_ID", 
              "Prsn_Airbag_ID", "Prsn_Helmet_ID", "Prsn_Alc_Spec_Type_ID", 
              "Prsn_Alc_Rslt_ID", "Drvr_Lic_Type_ID", "Drvr_Lic_State_ID", 
              "Person_ID", "Person_Type")]
dim(ppr1)
## [1] 7961292      18
head(ppr1)
##   Crash_ID Unit_Nbr Prsn_Nbr Prsn_Type_ID Prsn_Occpnt_Pos_ID Prsn_Age
## 1 14570071        1        1       Driver         Front left       21
## 2 14881309        1        1       Driver         Front left       55
## 3 14881309        2        1       Driver         Front left       22
## 4 14963691        1        1       Driver         Front left       65
## 5 14963691        2        1       Driver         Front left       28
## 6 14963691        3        1       Driver         Front left       36
##                          Prsn_Ethnicity_ID Prsn_Gndr_ID Prsn_Ejct_ID
## 1 White                                            Male           No
## 2 Hispanic                                         Male           No
## 3 Hispanic                                         Male           No
## 4 Black                                            Male           No
## 5 White                                            Male           No
## 6 Hispanic                                         Male           No
##          Prsn_Rest_ID  Prsn_Airbag_ID Prsn_Helmet_ID Prsn_Alc_Spec_Type_ID
## 1 Shoulder & lap belt Deployed, front Not applicable                  None
## 2             Unknown         Unknown Not applicable                  None
## 3 Shoulder & lap belt    Not deployed Not applicable                  None
## 4 Shoulder & lap belt    Not deployed Not applicable                 Blood
## 5 Shoulder & lap belt  Deployed, side Not applicable                  None
## 6 Shoulder & lap belt    Not deployed Not applicable                  None
##   Prsn_Alc_Rslt_ID Drvr_Lic_Type_ID Drvr_Lic_State_ID    Person_ID
## 1               NA   Driver license                Tx 14570071_1_1
## 2               NA       Unlicensed              <NA> 14881309_1_1
## 3               NA   Driver license                Tx 14881309_2_1
## 4                1   Driver license                Tx 14963691_1_1
## 5               NA   Driver license                Tx 14963691_2_1
## 6               NA       Unlicensed              <NA> 14963691_3_1
##     Person_Type
## 1 PrimaryPerson
## 2 PrimaryPerson
## 3 PrimaryPerson
## 4 PrimaryPerson
## 5 PrimaryPerson
## 6 PrimaryPerson
ppr1 <- ppr1 %>% mutate_if(is.character, as.factor) 
tab= ExpCTable(ppr1,Target=NULL,
          round=2,bin=NULL,per=F,clim=200,nlim=200)
datatable(
  tab, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c( 'csv', 'excel')
  )
)
cit1= cit[, c("Crash_ID", "Unit_Nbr", "Prsn_Nbr", "Charge", 
              "Unit_ID", "Person_ID")]
dim(cit1)
## [1] 2552160       6
head(cit1)
##   Crash_ID Unit_Nbr Prsn_Nbr                                      Charge
## 1 14570071        1        1                   DRIVING WHILE INTOXICATED
## 2 14881309        1        1 ACCIDENT INVOLVING DAMAGE TO VEHICLE >=$200
## 3 14963691        1        1                               RAN RED LIGHT
## 4 14963691        1        1                                         DWI
## 5 14990459        1        1                                         DUI
## 6 15316938        1        1                                         DWI
##      Unit_ID    Person_ID
## 1 14570071_1 14570071_1_1
## 2 14881309_1 14881309_1_1
## 3 14963691_1 14963691_1_1
## 4 14963691_1 14963691_1_1
## 5 14990459_1 14990459_1_1
## 6 15316938_1 15316938_1_1
cit2 <- cit1 %>% mutate_if(is.character, as.factor) 
tab= ExpCTable(cit2[, -c(1, 5, 6)],Target=NULL,
          round=2,bin=NULL,per=F,clim=212020,nlim=212020)
datatable(
  tab, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c( 'csv', 'excel')
  )
)
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html