SKIDNUMBER
setwd("C:/Users/s-das/Syncplicity Folders/Private/MY_Projects/SHRP-2 Projects/Atlas_Inclement Weather/TRB Paper")
a04 <- read.csv("FINAL_1.csv")
cntl <- subset(a04, DataType=="Control")
main <- subset(a04, DataType=="Main")
dim(cntl)
## [1] 11666 42
dim(main)
## [1] 11953 42
names(main)
## [1] "CRSHNUM_AG" "ID_A" "CRASHNUM1" "DataType"
## [5] "HIGHESTINJ" "Category" "Category1" "Vis_Score"
## [9] "SKIDNUMBER" "RCISLDWTH1" "RCISLDWTH2" "AVG_SH_WID"
## [13] "RCIAADT" "RCIAVGTFCT" "RCIMAXSPD" "LIGHTCOND"
## [17] "WEATHCOND" "DIV_UNDIV" "AGE_DRPED" "RDSURFCOND"
## [21] "YEAR" "CNTOFVEH" "ID" "Airport_Na"
## [25] "Airport_Nu" "LATITUDE" "LONGITUDE" "CRSHCAUSE1"
## [29] "CNTOFINJ" "CNTOFFATL" "CNTOFSVINJ" "RCIFUNCLAS"
## [33] "RCIACC" "CR_LATITUDE" "CR_LONGITUDE" "RCISLDWTH3"
## [37] "RCIMEDWDTH" "StartDate" "Start_Time" "End_Date"
## [41] "End_Time" "Duration"
summary(cntl)
## CRSHNUM_AG ID_A CRASHNUM1
## FL100100440_18: 1 A00002 : 1 FL822709170: 6
## FL100109580_31: 1 A00003 : 1 FL770497300: 5
## FL100109580_46: 1 A00005 : 1 FL770867990: 5
## FL101183920_18: 1 A00007 : 1 FL819791520: 5
## FL101183920_22: 1 A00009 : 1 FL819856850: 5
## FL101187230_37: 1 A00011 : 1 FL820478580: 5
## (Other) :11660 (Other):11660 (Other) :11635
## DataType HIGHESTINJ Category Category1 Vis_Score
## Control:11666 Min. :1.000 cat_a:4479 cat_10:11666 Min. :10
## Main : 0 1st Qu.:1.000 cat_b:2846 cat_a : 0 1st Qu.:10
## Median :1.000 cat_c:1654 cat_b : 0 Median :10
## Mean :1.799 cat_e:2687 cat_c : 0 Mean :10
## 3rd Qu.:2.000 cat_e : 0 3rd Qu.:10
## Max. :5.000 Max. :10
##
## SKIDNUMBER RCISLDWTH1 RCISLDWTH2 AVG_SH_WID
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:33.00 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 1.000
## Median :36.00 Median : 4.000 Median : 1.500 Median : 3.000
## Mean :35.53 Mean : 4.783 Mean : 2.716 Mean : 3.749
## 3rd Qu.:40.00 3rd Qu.: 7.000 3rd Qu.: 3.000 3rd Qu.: 6.000
## Max. :58.00 Max. :35.000 Max. :16.000 Max. :19.500
##
## RCIAADT RCIAVGTFCT RCIMAXSPD LIGHTCOND
## Min. : 0 Min. : 0.000 Min. :25.00 Min. :1.000
## 1st Qu.: 25500 1st Qu.: 3.230 1st Qu.:40.00 1st Qu.:1.000
## Median : 40500 Median : 4.500 Median :45.00 Median :1.000
## Mean : 60099 Mean : 5.109 Mean :44.91 Mean :2.055
## 3rd Qu.: 59000 3rd Qu.: 6.200 3rd Qu.:50.00 3rd Qu.:4.000
## Max. :304000 Max. :26.750 Max. :70.00 Max. :5.000
##
## WEATHCOND DIV_UNDIV AGE_DRPED RDSURFCOND
## Min. : 1.000 Min. :0.000 Min. : 15.00 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.: 26.00 1st Qu.: 1.000
## Median : 1.000 Median :1.000 Median : 38.00 Median : 1.000
## Mean : 1.407 Mean :1.301 Mean : 40.34 Mean : 1.182
## 3rd Qu.: 1.000 3rd Qu.:2.000 3rd Qu.: 52.00 3rd Qu.: 1.000
## Max. :77.000 Max. :2.000 Max. :108.00 Max. :88.000
##
## YEAR CNTOFVEH ID Airport_Na
## Min. :2010 Min. :1.000 A0525 : 182 Cecil Field :1569
## 1st Qu.:2011 1st Qu.:2.000 A0418 : 179 Daytona Beach :1239
## Median :2011 Median :2.000 A0898 : 160 Kissimmee Gateway:1119
## Mean :2011 Mean :2.165 A0447 : 122 Whitehouse Naval :1043
## 3rd Qu.:2012 3rd Qu.:2.000 A0855 : 122 Ocala : 689
## Max. :2012 Max. :7.000 A0531 : 120 Bartow : 650
## (Other):10781 (Other) :5357
## Airport_Nu LATITUDE LONGITUDE CRSHCAUSE1
## Min. :12809 Min. :24.56 Min. :-87.32 Min. : 0.00
## 1st Qu.:12834 1st Qu.:26.00 1st Qu.:-82.16 1st Qu.: 2.00
## Median :12873 Median :26.68 Median :-80.65 Median : 2.00
## Mean :36699 Mean :27.47 Mean :-81.43 Mean :15.07
## 3rd Qu.:63823 3rd Qu.:28.55 3rd Qu.:-80.28 3rd Qu.:10.00
## Max. :92825 Max. :30.85 Max. :-80.10 Max. :77.00
##
## CNTOFINJ CNTOFFATL CNTOFSVINJ RCIFUNCLAS
## Min. : 0.0000 Min. :0.00000 Min. :0.00000 Min. : 1.0
## 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:14.0
## Median : 0.0000 Median :0.00000 Median :0.00000 Median :14.0
## Mean : 0.7917 Mean :0.01114 Mean :0.07526 Mean :13.8
## 3rd Qu.: 1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:14.0
## Max. :28.0000 Max. :5.00000 Max. :6.00000 Max. :19.0
## NA's :416
## RCIACC CR_LATITUDE CR_LONGITUDE RCISLDWTH3
## Min. :1.000 Min. :24.55 Min. :-87.37 Min. : 0.0000
## 1st Qu.:3.000 1st Qu.:25.94 1st Qu.:-82.18 1st Qu.: 0.0000
## Median :3.000 Median :26.74 Median :-80.67 Median : 0.0000
## Mean :2.585 Mean :27.47 Mean :-81.43 Mean : 0.0486
## 3rd Qu.:3.000 3rd Qu.:28.55 3rd Qu.:-80.23 3rd Qu.: 0.0000
## Max. :3.000 Max. :30.88 Max. :-80.04 Max. :12.0000
## NA's :377
## RCIMEDWDTH StartDate Start_Time
## Min. : 0.00 2/4/2011 0:00 : 459 14:50:00: 324
## 1st Qu.: 12.00 2/3/2011 0:00 : 280 7:53:00 : 262
## Median : 20.00 10/8/2011 0:00: 267 6:50:00 : 249
## Mean : 24.35 2/1/2010 0:00 : 266 5:53:00 : 242
## 3rd Qu.: 28.00 2/7/2012 0:00 : 263 16:50:00: 238
## Max. :975.00 12/7/2012 0:00: 255 15:50:00: 220
## (Other) :9876 (Other) :10131
## End_Date End_Time Duration
## 2/4/2011 0:00 : 459 7:53:00 : 683 Min. : 1.500
## 2/3/2011 0:00 : 280 8:53:00 : 577 1st Qu.: 2.000
## 10/8/2011 0:00: 267 6:53:00 : 481 Median : 2.333
## 2/1/2010 0:00 : 266 16:50:00: 423 Mean : 3.051
## 2/7/2012 0:00 : 263 8:50:00 : 345 3rd Qu.: 3.267
## 12/7/2012 0:00: 255 17:53:00: 339 Max. :11.917
## (Other) :9876 (Other) :8818
summary(main)
## CRSHNUM_AG ID_A CRASHNUM1
## FL101522170_34: 1 A21760 : 1 FL804639710: 7
## FL101522170_47: 1 A21761 : 1 FL828232580: 7
## FL101560890_15: 1 A21764 : 1 FL820128160: 6
## FL101758930_33: 1 A21765 : 1 FL832344760: 6
## FL101758930_41: 1 A21766 : 1 FL764352810: 5
## FL101758930_79: 1 A21767 : 1 FL772627210: 5
## (Other) :11947 (Other):11947 (Other) :11917
## DataType HIGHESTINJ Category Category1
## Control: 0 Min. :1.000 cat_a:4584 cat_10: 0
## Main :11953 1st Qu.:1.000 cat_b:3309 cat_a :4584
## Median :1.000 cat_c:1922 cat_b :3309
## Mean :1.771 cat_e:2138 cat_c :1922
## 3rd Qu.:2.000 cat_e :2138
## Max. :5.000
##
## Vis_Score SKIDNUMBER RCISLDWTH1 RCISLDWTH2
## Min. :0.500 Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:1.000 1st Qu.:33.00 1st Qu.: 2.000 1st Qu.: 0.000
## Median :2.250 Median :36.00 Median : 4.000 Median : 2.000
## Mean :2.215 Mean :35.05 Mean : 4.926 Mean : 2.948
## 3rd Qu.:3.500 3rd Qu.:40.00 3rd Qu.: 8.000 3rd Qu.: 4.000
## Max. :3.500 Max. :62.00 Max. :30.000 Max. :16.000
##
## AVG_SH_WID RCIAADT RCIAVGTFCT RCIMAXSPD
## Min. : 0.000 Min. : 0 Min. : 0.000 Min. :25.00
## 1st Qu.: 1.000 1st Qu.: 26500 1st Qu.: 3.300 1st Qu.:40.00
## Median : 3.000 Median : 41000 Median : 4.600 Median :45.00
## Mean : 3.937 Mean : 61700 Mean : 5.231 Mean :45.58
## 3rd Qu.: 6.000 3rd Qu.: 62000 3rd Qu.: 6.300 3rd Qu.:50.00
## Max. :19.500 Max. :304000 Max. :31.770 Max. :70.00
## NA's :11
## LIGHTCOND WEATHCOND DIV_UNDIV AGE_DRPED
## Min. :1.000 Min. : 1.000 Min. :0.000 Min. : 15.00
## 1st Qu.:1.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.: 25.00
## Median :1.000 Median : 1.000 Median :1.000 Median : 37.00
## Mean :1.997 Mean : 1.762 Mean :1.282 Mean : 38.82
## 3rd Qu.:4.000 3rd Qu.: 2.000 3rd Qu.:2.000 3rd Qu.: 50.00
## Max. :5.000 Max. :77.000 Max. :2.000 Max. :100.00
##
## RDSURFCOND YEAR CNTOFVEH ID
## Min. : 1.000 Min. :2010 Min. :1.000 A0898 : 155
## 1st Qu.: 1.000 1st Qu.:2010 1st Qu.:2.000 A0092 : 122
## Median : 1.000 Median :2011 Median :2.000 A0622 : 108
## Mean : 1.454 Mean :2011 Mean :2.183 A0447 : 107
## 3rd Qu.: 2.000 3rd Qu.:2012 3rd Qu.:2.000 A0525 : 106
## Max. :88.000 Max. :2012 Max. :8.000 A0884 : 100
## (Other):11255
## Airport_Na Airport_Nu LATITUDE LONGITUDE
## Cecil Field :1622 Min. :12809 Min. :24.56 Min. :-87.32
## Daytona Beach :1412 1st Qu.:12834 1st Qu.:26.00 1st Qu.:-82.16
## Kissimmee Gateway:1076 Median :12873 Median :26.92 Median :-80.65
## Ocala : 754 Mean :35040 Mean :27.46 Mean :-81.40
## Whitehouse Naval : 748 3rd Qu.:63823 3rd Qu.:28.55 3rd Qu.:-80.24
## Bartow : 686 Max. :92825 Max. :30.84 Max. :-80.10
## (Other) :5655
## CRSHCAUSE1 CNTOFINJ CNTOFFATL CNTOFSVINJ
## Min. : 0.0 Min. : 0.0000 Min. :0.000000 Min. :0.00000
## 1st Qu.: 2.0 1st Qu.: 0.0000 1st Qu.:0.000000 1st Qu.:0.00000
## Median : 2.0 Median : 0.0000 Median :0.000000 Median :0.00000
## Mean :14.7 Mean : 0.7845 Mean :0.006526 Mean :0.06685
## 3rd Qu.:10.0 3rd Qu.: 1.0000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :77.0 Max. :12.0000 Max. :2.000000 Max. :5.00000
##
## RCIFUNCLAS RCIACC CR_LATITUDE CR_LONGITUDE
## Min. : 1.0 Min. :1.000 Min. :24.55 Min. :-87.37
## 1st Qu.:14.0 1st Qu.:3.000 1st Qu.:25.97 1st Qu.:-82.15
## Median :14.0 Median :3.000 Median :26.93 Median :-80.70
## Mean :13.7 Mean :2.542 Mean :27.46 Mean :-81.39
## 3rd Qu.:14.0 3rd Qu.:3.000 3rd Qu.:28.55 3rd Qu.:-80.23
## Max. :19.0 Max. :3.000 Max. :30.84 Max. :-80.04
## NA's :484 NA's :462
## RCISLDWTH3 RCIMEDWDTH StartDate
## Min. : 0.00000 Min. : 0.00 2/1/2010 0:00 : 408
## 1st Qu.: 0.00000 1st Qu.: 13.00 3/12/2010 0:00 : 337
## Median : 0.00000 Median : 20.00 2/4/2011 0:00 : 317
## Mean : 0.03623 Mean : 25.54 2/17/2012 0:00 : 303
## 3rd Qu.: 0.00000 3rd Qu.: 29.00 10/8/2011 0:00 : 260
## Max. :12.00000 Max. :975.00 12/12/2011 0:00: 235
## (Other) :10093
## Start_Time End_Date End_Time Duration
## 6:50:00 : 354 2/1/2010 0:00 : 408 7:53:00 : 605 Min. : 0.000
## 5:53:00 : 326 3/12/2010 0:00 : 337 7:50:00 : 484 1st Qu.: 1.650
## 7:53:00 : 294 2/4/2011 0:00 : 317 8:50:00 : 480 Median : 2.000
## 14:50:00: 246 2/17/2012 0:00 : 303 6:53:00 : 452 Mean : 2.364
## 15:50:00: 239 10/8/2011 0:00 : 260 16:50:00: 410 3rd Qu.: 3.000
## 5:55:00 : 204 12/12/2011 0:00: 235 8:53:00 : 360 Max. :11.917
## (Other) :10290 (Other) :10093 (Other) :9162
library(ggplot2)
names(cntl)
## [1] "CRSHNUM_AG" "ID_A" "CRASHNUM1" "DataType"
## [5] "HIGHESTINJ" "Category" "Category1" "Vis_Score"
## [9] "SKIDNUMBER" "RCISLDWTH1" "RCISLDWTH2" "AVG_SH_WID"
## [13] "RCIAADT" "RCIAVGTFCT" "RCIMAXSPD" "LIGHTCOND"
## [17] "WEATHCOND" "DIV_UNDIV" "AGE_DRPED" "RDSURFCOND"
## [21] "YEAR" "CNTOFVEH" "ID" "Airport_Na"
## [25] "Airport_Nu" "LATITUDE" "LONGITUDE" "CRSHCAUSE1"
## [29] "CNTOFINJ" "CNTOFFATL" "CNTOFSVINJ" "RCIFUNCLAS"
## [33] "RCIACC" "CR_LATITUDE" "CR_LONGITUDE" "RCISLDWTH3"
## [37] "RCIMEDWDTH" "StartDate" "Start_Time" "End_Date"
## [41] "End_Time" "Duration"
m1 <-ggplot(cntl, aes(x=SKIDNUMBER)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=SKIDNUMBER)) + geom_histogram()+theme_bw()
library(grid)
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AVG_SH_WID
m1 <-ggplot(cntl, aes(x=AVG_SH_WID)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=AVG_SH_WID)) + geom_histogram()+theme_bw()
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ARCIAADT
m1 <-ggplot(cntl, aes(x=RCIAADT)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=RCIAADT)) + geom_histogram()+theme_bw()
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

RCIAVGTFCT
m1 <-ggplot(cntl, aes(x=RCIAVGTFCT)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=RCIAVGTFCT)) + geom_histogram()+theme_bw()
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

RCIMAXSPD
m1 <-ggplot(cntl, aes(x=RCIMAXSPD)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=RCIMAXSPD)) + geom_histogram()+theme_bw()
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AGE_DRPED
m1 <-ggplot(cntl, aes(x=AGE_DRPED)) + geom_histogram()+theme_bw()
m2 <- ggplot(main, aes(x=AGE_DRPED)) + geom_histogram()+theme_bw()
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 2)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
print(m1, vp = vplayout(1, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(m2, vp = vplayout(1, 2))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

AVG_SH_WID
main$WEATHCOND <- as.factor(main$WEATHCOND)
cntl$WEATHCOND <- as.factor(cntl$WEATHCOND)
table(main$WEATHCOND)
##
## 1 2 3 4 77
## 6978 2481 2313 161 20
table(cntl$WEATHCOND)
##
## 1 2 3 4 77
## 8761 1909 950 35 11
main$LIGHTCOND <- as.factor(main$LIGHTCOND)
cntl$LIGHTCOND <- as.factor(cntl$LIGHTCOND)
table(main$LIGHTCOND)
##
## 1 2 3 4 5
## 7806 336 275 3117 419
table(cntl$LIGHTCOND)
##
## 1 2 3 4 5
## 7383 352 232 3300 399
main$DIV_UNDIV <- as.factor(main$DIV_UNDIV)
cntl$DIV_UNDIV <- as.factor(cntl$DIV_UNDIV)
table(main$DIV_UNDIV)
##
## 0 1 2
## 157 8270 3526
table(cntl$DIV_UNDIV)
##
## 0 1 2
## 159 7841 3666
ftable(HIGHESTINJ~ Category1, main)
## HIGHESTINJ 1 2 3 4 5
## Category1
## cat_10 0 0 0 0 0
## cat_a 2424 1174 710 251 25
## cat_b 1715 910 528 141 15
## cat_c 994 456 332 125 15
## cat_e 1046 566 400 105 21
ftable(HIGHESTINJ~ Category1, cntl)
## HIGHESTINJ 1 2 3 4 5
## Category1
## cat_10 5966 2950 1979 666 105
## cat_a 0 0 0 0 0
## cat_b 0 0 0 0 0
## cat_c 0 0 0 0 0
## cat_e 0 0 0 0 0