SKIDNUMBER

setwd("C:/Users/s-das/Syncplicity Folders/Private/MY_Projects/SHRP-2 Projects/Atlas_Inclement Weather/TRB Paper")
a04 <- read.csv("FINAL_1.csv")
cntl <- subset(a04, DataType=="Control")
main <- subset(a04, DataType=="Main")

dim(cntl)
## [1] 11666    42
dim(main)
## [1] 11953    42
names(main)
##  [1] "CRSHNUM_AG"   "ID_A"         "CRASHNUM1"    "DataType"    
##  [5] "HIGHESTINJ"   "Category"     "Category1"    "Vis_Score"   
##  [9] "SKIDNUMBER"   "RCISLDWTH1"   "RCISLDWTH2"   "AVG_SH_WID"  
## [13] "RCIAADT"      "RCIAVGTFCT"   "RCIMAXSPD"    "LIGHTCOND"   
## [17] "WEATHCOND"    "DIV_UNDIV"    "AGE_DRPED"    "RDSURFCOND"  
## [21] "YEAR"         "CNTOFVEH"     "ID"           "Airport_Na"  
## [25] "Airport_Nu"   "LATITUDE"     "LONGITUDE"    "CRSHCAUSE1"  
## [29] "CNTOFINJ"     "CNTOFFATL"    "CNTOFSVINJ"   "RCIFUNCLAS"  
## [33] "RCIACC"       "CR_LATITUDE"  "CR_LONGITUDE" "RCISLDWTH3"  
## [37] "RCIMEDWDTH"   "StartDate"    "Start_Time"   "End_Date"    
## [41] "End_Time"     "Duration"
summary(cntl)
##           CRSHNUM_AG         ID_A             CRASHNUM1    
##  FL100100440_18:    1   A00002 :    1   FL822709170:    6  
##  FL100109580_31:    1   A00003 :    1   FL770497300:    5  
##  FL100109580_46:    1   A00005 :    1   FL770867990:    5  
##  FL101183920_18:    1   A00007 :    1   FL819791520:    5  
##  FL101183920_22:    1   A00009 :    1   FL819856850:    5  
##  FL101187230_37:    1   A00011 :    1   FL820478580:    5  
##  (Other)       :11660   (Other):11660   (Other)    :11635  
##     DataType       HIGHESTINJ     Category     Category1       Vis_Score 
##  Control:11666   Min.   :1.000   cat_a:4479   cat_10:11666   Min.   :10  
##  Main   :    0   1st Qu.:1.000   cat_b:2846   cat_a :    0   1st Qu.:10  
##                  Median :1.000   cat_c:1654   cat_b :    0   Median :10  
##                  Mean   :1.799   cat_e:2687   cat_c :    0   Mean   :10  
##                  3rd Qu.:2.000                cat_e :    0   3rd Qu.:10  
##                  Max.   :5.000                               Max.   :10  
##                                                                          
##    SKIDNUMBER      RCISLDWTH1       RCISLDWTH2       AVG_SH_WID    
##  Min.   : 0.00   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:33.00   1st Qu.: 2.000   1st Qu.: 0.000   1st Qu.: 1.000  
##  Median :36.00   Median : 4.000   Median : 1.500   Median : 3.000  
##  Mean   :35.53   Mean   : 4.783   Mean   : 2.716   Mean   : 3.749  
##  3rd Qu.:40.00   3rd Qu.: 7.000   3rd Qu.: 3.000   3rd Qu.: 6.000  
##  Max.   :58.00   Max.   :35.000   Max.   :16.000   Max.   :19.500  
##                                                                    
##     RCIAADT         RCIAVGTFCT       RCIMAXSPD       LIGHTCOND    
##  Min.   :     0   Min.   : 0.000   Min.   :25.00   Min.   :1.000  
##  1st Qu.: 25500   1st Qu.: 3.230   1st Qu.:40.00   1st Qu.:1.000  
##  Median : 40500   Median : 4.500   Median :45.00   Median :1.000  
##  Mean   : 60099   Mean   : 5.109   Mean   :44.91   Mean   :2.055  
##  3rd Qu.: 59000   3rd Qu.: 6.200   3rd Qu.:50.00   3rd Qu.:4.000  
##  Max.   :304000   Max.   :26.750   Max.   :70.00   Max.   :5.000  
##                                                                   
##    WEATHCOND        DIV_UNDIV       AGE_DRPED        RDSURFCOND    
##  Min.   : 1.000   Min.   :0.000   Min.   : 15.00   Min.   : 1.000  
##  1st Qu.: 1.000   1st Qu.:1.000   1st Qu.: 26.00   1st Qu.: 1.000  
##  Median : 1.000   Median :1.000   Median : 38.00   Median : 1.000  
##  Mean   : 1.407   Mean   :1.301   Mean   : 40.34   Mean   : 1.182  
##  3rd Qu.: 1.000   3rd Qu.:2.000   3rd Qu.: 52.00   3rd Qu.: 1.000  
##  Max.   :77.000   Max.   :2.000   Max.   :108.00   Max.   :88.000  
##                                                                    
##       YEAR         CNTOFVEH           ID                    Airport_Na  
##  Min.   :2010   Min.   :1.000   A0525  :  182   Cecil Field      :1569  
##  1st Qu.:2011   1st Qu.:2.000   A0418  :  179   Daytona Beach    :1239  
##  Median :2011   Median :2.000   A0898  :  160   Kissimmee Gateway:1119  
##  Mean   :2011   Mean   :2.165   A0447  :  122   Whitehouse Naval :1043  
##  3rd Qu.:2012   3rd Qu.:2.000   A0855  :  122   Ocala            : 689  
##  Max.   :2012   Max.   :7.000   A0531  :  120   Bartow           : 650  
##                                 (Other):10781   (Other)          :5357  
##    Airport_Nu       LATITUDE       LONGITUDE        CRSHCAUSE1   
##  Min.   :12809   Min.   :24.56   Min.   :-87.32   Min.   : 0.00  
##  1st Qu.:12834   1st Qu.:26.00   1st Qu.:-82.16   1st Qu.: 2.00  
##  Median :12873   Median :26.68   Median :-80.65   Median : 2.00  
##  Mean   :36699   Mean   :27.47   Mean   :-81.43   Mean   :15.07  
##  3rd Qu.:63823   3rd Qu.:28.55   3rd Qu.:-80.28   3rd Qu.:10.00  
##  Max.   :92825   Max.   :30.85   Max.   :-80.10   Max.   :77.00  
##                                                                  
##     CNTOFINJ         CNTOFFATL         CNTOFSVINJ        RCIFUNCLAS  
##  Min.   : 0.0000   Min.   :0.00000   Min.   :0.00000   Min.   : 1.0  
##  1st Qu.: 0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:14.0  
##  Median : 0.0000   Median :0.00000   Median :0.00000   Median :14.0  
##  Mean   : 0.7917   Mean   :0.01114   Mean   :0.07526   Mean   :13.8  
##  3rd Qu.: 1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:14.0  
##  Max.   :28.0000   Max.   :5.00000   Max.   :6.00000   Max.   :19.0  
##                                                        NA's   :416   
##      RCIACC       CR_LATITUDE     CR_LONGITUDE      RCISLDWTH3     
##  Min.   :1.000   Min.   :24.55   Min.   :-87.37   Min.   : 0.0000  
##  1st Qu.:3.000   1st Qu.:25.94   1st Qu.:-82.18   1st Qu.: 0.0000  
##  Median :3.000   Median :26.74   Median :-80.67   Median : 0.0000  
##  Mean   :2.585   Mean   :27.47   Mean   :-81.43   Mean   : 0.0486  
##  3rd Qu.:3.000   3rd Qu.:28.55   3rd Qu.:-80.23   3rd Qu.: 0.0000  
##  Max.   :3.000   Max.   :30.88   Max.   :-80.04   Max.   :12.0000  
##  NA's   :377                                                       
##    RCIMEDWDTH              StartDate       Start_Time   
##  Min.   :  0.00   2/4/2011 0:00 : 459   14:50:00:  324  
##  1st Qu.: 12.00   2/3/2011 0:00 : 280   7:53:00 :  262  
##  Median : 20.00   10/8/2011 0:00: 267   6:50:00 :  249  
##  Mean   : 24.35   2/1/2010 0:00 : 266   5:53:00 :  242  
##  3rd Qu.: 28.00   2/7/2012 0:00 : 263   16:50:00:  238  
##  Max.   :975.00   12/7/2012 0:00: 255   15:50:00:  220  
##                   (Other)       :9876   (Other) :10131  
##            End_Date        End_Time       Duration     
##  2/4/2011 0:00 : 459   7:53:00 : 683   Min.   : 1.500  
##  2/3/2011 0:00 : 280   8:53:00 : 577   1st Qu.: 2.000  
##  10/8/2011 0:00: 267   6:53:00 : 481   Median : 2.333  
##  2/1/2010 0:00 : 266   16:50:00: 423   Mean   : 3.051  
##  2/7/2012 0:00 : 263   8:50:00 : 345   3rd Qu.: 3.267  
##  12/7/2012 0:00: 255   17:53:00: 339   Max.   :11.917  
##  (Other)       :9876   (Other) :8818
summary(main)
##           CRSHNUM_AG         ID_A             CRASHNUM1    
##  FL101522170_34:    1   A21760 :    1   FL804639710:    7  
##  FL101522170_47:    1   A21761 :    1   FL828232580:    7  
##  FL101560890_15:    1   A21764 :    1   FL820128160:    6  
##  FL101758930_33:    1   A21765 :    1   FL832344760:    6  
##  FL101758930_41:    1   A21766 :    1   FL764352810:    5  
##  FL101758930_79:    1   A21767 :    1   FL772627210:    5  
##  (Other)       :11947   (Other):11947   (Other)    :11917  
##     DataType       HIGHESTINJ     Category     Category1   
##  Control:    0   Min.   :1.000   cat_a:4584   cat_10:   0  
##  Main   :11953   1st Qu.:1.000   cat_b:3309   cat_a :4584  
##                  Median :1.000   cat_c:1922   cat_b :3309  
##                  Mean   :1.771   cat_e:2138   cat_c :1922  
##                  3rd Qu.:2.000                cat_e :2138  
##                  Max.   :5.000                             
##                                                            
##    Vis_Score       SKIDNUMBER      RCISLDWTH1       RCISLDWTH2    
##  Min.   :0.500   Min.   : 0.00   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:1.000   1st Qu.:33.00   1st Qu.: 2.000   1st Qu.: 0.000  
##  Median :2.250   Median :36.00   Median : 4.000   Median : 2.000  
##  Mean   :2.215   Mean   :35.05   Mean   : 4.926   Mean   : 2.948  
##  3rd Qu.:3.500   3rd Qu.:40.00   3rd Qu.: 8.000   3rd Qu.: 4.000  
##  Max.   :3.500   Max.   :62.00   Max.   :30.000   Max.   :16.000  
##                                                                   
##    AVG_SH_WID        RCIAADT         RCIAVGTFCT       RCIMAXSPD    
##  Min.   : 0.000   Min.   :     0   Min.   : 0.000   Min.   :25.00  
##  1st Qu.: 1.000   1st Qu.: 26500   1st Qu.: 3.300   1st Qu.:40.00  
##  Median : 3.000   Median : 41000   Median : 4.600   Median :45.00  
##  Mean   : 3.937   Mean   : 61700   Mean   : 5.231   Mean   :45.58  
##  3rd Qu.: 6.000   3rd Qu.: 62000   3rd Qu.: 6.300   3rd Qu.:50.00  
##  Max.   :19.500   Max.   :304000   Max.   :31.770   Max.   :70.00  
##                                                     NA's   :11     
##    LIGHTCOND       WEATHCOND        DIV_UNDIV       AGE_DRPED     
##  Min.   :1.000   Min.   : 1.000   Min.   :0.000   Min.   : 15.00  
##  1st Qu.:1.000   1st Qu.: 1.000   1st Qu.:1.000   1st Qu.: 25.00  
##  Median :1.000   Median : 1.000   Median :1.000   Median : 37.00  
##  Mean   :1.997   Mean   : 1.762   Mean   :1.282   Mean   : 38.82  
##  3rd Qu.:4.000   3rd Qu.: 2.000   3rd Qu.:2.000   3rd Qu.: 50.00  
##  Max.   :5.000   Max.   :77.000   Max.   :2.000   Max.   :100.00  
##                                                                   
##    RDSURFCOND          YEAR         CNTOFVEH           ID       
##  Min.   : 1.000   Min.   :2010   Min.   :1.000   A0898  :  155  
##  1st Qu.: 1.000   1st Qu.:2010   1st Qu.:2.000   A0092  :  122  
##  Median : 1.000   Median :2011   Median :2.000   A0622  :  108  
##  Mean   : 1.454   Mean   :2011   Mean   :2.183   A0447  :  107  
##  3rd Qu.: 2.000   3rd Qu.:2012   3rd Qu.:2.000   A0525  :  106  
##  Max.   :88.000   Max.   :2012   Max.   :8.000   A0884  :  100  
##                                                  (Other):11255  
##              Airport_Na     Airport_Nu       LATITUDE       LONGITUDE     
##  Cecil Field      :1622   Min.   :12809   Min.   :24.56   Min.   :-87.32  
##  Daytona Beach    :1412   1st Qu.:12834   1st Qu.:26.00   1st Qu.:-82.16  
##  Kissimmee Gateway:1076   Median :12873   Median :26.92   Median :-80.65  
##  Ocala            : 754   Mean   :35040   Mean   :27.46   Mean   :-81.40  
##  Whitehouse Naval : 748   3rd Qu.:63823   3rd Qu.:28.55   3rd Qu.:-80.24  
##  Bartow           : 686   Max.   :92825   Max.   :30.84   Max.   :-80.10  
##  (Other)          :5655                                                   
##    CRSHCAUSE1      CNTOFINJ         CNTOFFATL          CNTOFSVINJ     
##  Min.   : 0.0   Min.   : 0.0000   Min.   :0.000000   Min.   :0.00000  
##  1st Qu.: 2.0   1st Qu.: 0.0000   1st Qu.:0.000000   1st Qu.:0.00000  
##  Median : 2.0   Median : 0.0000   Median :0.000000   Median :0.00000  
##  Mean   :14.7   Mean   : 0.7845   Mean   :0.006526   Mean   :0.06685  
##  3rd Qu.:10.0   3rd Qu.: 1.0000   3rd Qu.:0.000000   3rd Qu.:0.00000  
##  Max.   :77.0   Max.   :12.0000   Max.   :2.000000   Max.   :5.00000  
##                                                                       
##    RCIFUNCLAS       RCIACC       CR_LATITUDE     CR_LONGITUDE   
##  Min.   : 1.0   Min.   :1.000   Min.   :24.55   Min.   :-87.37  
##  1st Qu.:14.0   1st Qu.:3.000   1st Qu.:25.97   1st Qu.:-82.15  
##  Median :14.0   Median :3.000   Median :26.93   Median :-80.70  
##  Mean   :13.7   Mean   :2.542   Mean   :27.46   Mean   :-81.39  
##  3rd Qu.:14.0   3rd Qu.:3.000   3rd Qu.:28.55   3rd Qu.:-80.23  
##  Max.   :19.0   Max.   :3.000   Max.   :30.84   Max.   :-80.04  
##  NA's   :484    NA's   :462                                     
##    RCISLDWTH3         RCIMEDWDTH               StartDate    
##  Min.   : 0.00000   Min.   :  0.00   2/1/2010 0:00  :  408  
##  1st Qu.: 0.00000   1st Qu.: 13.00   3/12/2010 0:00 :  337  
##  Median : 0.00000   Median : 20.00   2/4/2011 0:00  :  317  
##  Mean   : 0.03623   Mean   : 25.54   2/17/2012 0:00 :  303  
##  3rd Qu.: 0.00000   3rd Qu.: 29.00   10/8/2011 0:00 :  260  
##  Max.   :12.00000   Max.   :975.00   12/12/2011 0:00:  235  
##                                      (Other)        :10093  
##     Start_Time               End_Date         End_Time       Duration     
##  6:50:00 :  354   2/1/2010 0:00  :  408   7:53:00 : 605   Min.   : 0.000  
##  5:53:00 :  326   3/12/2010 0:00 :  337   7:50:00 : 484   1st Qu.: 1.650  
##  7:53:00 :  294   2/4/2011 0:00  :  317   8:50:00 : 480   Median : 2.000  
##  14:50:00:  246   2/17/2012 0:00 :  303   6:53:00 : 452   Mean   : 2.364  
##  15:50:00:  239   10/8/2011 0:00 :  260   16:50:00: 410   3rd Qu.: 3.000  
##  5:55:00 :  204   12/12/2011 0:00:  235   8:53:00 : 360   Max.   :11.917  
##  (Other) :10290   (Other)        :10093   (Other) :9162
library(ggplot2)
names(cntl)
##  [1] "CRSHNUM_AG"   "ID_A"         "CRASHNUM1"    "DataType"    
##  [5] "HIGHESTINJ"   "Category"     "Category1"    "Vis_Score"   
##  [9] "SKIDNUMBER"   "RCISLDWTH1"   "RCISLDWTH2"   "AVG_SH_WID"  
## [13] "RCIAADT"      "RCIAVGTFCT"   "RCIMAXSPD"    "LIGHTCOND"   
## [17] "WEATHCOND"    "DIV_UNDIV"    "AGE_DRPED"    "RDSURFCOND"  
## [21] "YEAR"         "CNTOFVEH"     "ID"           "Airport_Na"  
## [25] "Airport_Nu"   "LATITUDE"     "LONGITUDE"    "CRSHCAUSE1"  
## [29] "CNTOFINJ"     "CNTOFFATL"    "CNTOFSVINJ"   "RCIFUNCLAS"  
## [33] "RCIACC"       "CR_LATITUDE"  "CR_LONGITUDE" "RCISLDWTH3"  
## [37] "RCIMEDWDTH"   "StartDate"    "Start_Time"   "End_Date"    
## [41] "End_Time"     "Duration"
m1 <-ggplot(cntl, aes(x=SKIDNUMBER)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=SKIDNUMBER)) + geom_histogram()+theme_bw()



library(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AVG_SH_WID

m1 <-ggplot(cntl, aes(x=AVG_SH_WID)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=AVG_SH_WID)) + geom_histogram()+theme_bw()

grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ARCIAADT

m1 <-ggplot(cntl, aes(x=RCIAADT)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=RCIAADT)) + geom_histogram()+theme_bw()

grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

RCIAVGTFCT

m1 <-ggplot(cntl, aes(x=RCIAVGTFCT)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=RCIAVGTFCT)) + geom_histogram()+theme_bw()

grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

RCIMAXSPD

m1 <-ggplot(cntl, aes(x=RCIMAXSPD)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=RCIMAXSPD)) + geom_histogram()+theme_bw()

grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AGE_DRPED

m1 <-ggplot(cntl, aes(x=AGE_DRPED)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=AGE_DRPED)) + geom_histogram()+theme_bw()

grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
  viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AVG_SH_WID

main$WEATHCOND <- as.factor(main$WEATHCOND)
cntl$WEATHCOND <- as.factor(cntl$WEATHCOND)
table(main$WEATHCOND)
## 
##    1    2    3    4   77 
## 6978 2481 2313  161   20
table(cntl$WEATHCOND)
## 
##    1    2    3    4   77 
## 8761 1909  950   35   11
main$LIGHTCOND <- as.factor(main$LIGHTCOND)
cntl$LIGHTCOND <- as.factor(cntl$LIGHTCOND)
table(main$LIGHTCOND)
## 
##    1    2    3    4    5 
## 7806  336  275 3117  419
table(cntl$LIGHTCOND)
## 
##    1    2    3    4    5 
## 7383  352  232 3300  399
main$DIV_UNDIV <- as.factor(main$DIV_UNDIV)
cntl$DIV_UNDIV <- as.factor(cntl$DIV_UNDIV)
table(main$DIV_UNDIV)
## 
##    0    1    2 
##  157 8270 3526
table(cntl$DIV_UNDIV)
## 
##    0    1    2 
##  159 7841 3666
ftable(HIGHESTINJ~ Category1, main)
##           HIGHESTINJ    1    2    3    4    5
## Category1                                    
## cat_10                  0    0    0    0    0
## cat_a                2424 1174  710  251   25
## cat_b                1715  910  528  141   15
## cat_c                 994  456  332  125   15
## cat_e                1046  566  400  105   21
ftable(HIGHESTINJ~ Category1, cntl)
##           HIGHESTINJ    1    2    3    4    5
## Category1                                    
## cat_10               5966 2950 1979  666  105
## cat_a                   0    0    0    0    0
## cat_b                   0    0    0    0    0
## cat_c                   0    0    0    0    0
## cat_e                   0    0    0    0    0