Data calling

setwd("/Users/subasishdas1/Copy/Rpubs/rpubs/Crash_exmp1")
all_a <- read.csv("All_a_new.csv")
head(all_a)  ## Read first few rows
##   Avg_Crashes  ADT Segment_Length Median_Width Number_Lanes Shoulder_Width
## 1        0.00 2264           0.31            0            2              5
## 2        1.00  337           0.23            0            2              0
## 3        1.67 3601           0.31            0            2              0
## 4        0.67 3962           0.13            0            2              0
## 5        7.00 3237           0.17            0            2              0
## 6        0.00  701           0.09            0            2              0
##   Lane_Width Class CBD    Type
## 1         11     1   0 twolane
## 2         10     1   0 twolane
## 3         10     1   0 twolane
## 4         11     2   0 twolane
## 5         12     2   0 twolane
## 6         10     1   1 twolane
dim(all_a) ### Rows, Columns
## [1] 90 10
names(all_a)  ## Name of the columns
##  [1] "Avg_Crashes"    "ADT"            "Segment_Length" "Median_Width"  
##  [5] "Number_Lanes"   "Shoulder_Width" "Lane_Width"     "Class"         
##  [9] "CBD"            "Type"
table(all_a$Type)
## 
## fourlane      Int  twolane 
##       30       30       30
twolane <- subset(all_a, Type=="twolane")   ## subsetting
fourlane <- subset(all_a, Type=="fourlane")
int <- subset(all_a, Type=="Int")

Two-lane

library(e1071)
svm.model <- svm(Avg_Crashes~ADT +Segment_Length +Median_Width
                 +Number_Lanes +Shoulder_Width +Lane_Width +Class +CBD, data=twolane, cost=100,gamma=1)
## Warning in svm.default(x, y, scale = scale, ..., na.action = na.action):
## Variable(s) 'Median_Width' and 'Number_Lanes' constant. Cannot scale data.
### modification
svm.model <- svm(Avg_Crashes~ADT +Segment_Length +Shoulder_Width 
+Lane_Width +Class +CBD, data=twolane, cost=100,gamma=1) ### Remove two redundant columns. Check warnings above.  
svm.pred <- predict(svm.model, twolane[2:9])

### write.csv(svm.pred, "svm_twolane.csv")
pl <- cbind(obs= twolane$Avg_Crashes, pred=svm.pred, diff=svm.pred-twolane$Avg_Crashes)
pl <- data.frame(pl)
pl
##      obs      pred        diff
## 1   0.00 0.2178539  0.21785392
## 2   1.00 1.2173730  0.21737301
## 3   1.67 1.4513990 -0.21860099
## 4   0.67 0.8870490  0.21704898
## 5   7.00 6.7817412 -0.21825881
## 6   0.00 0.2181368  0.21813677
## 7   1.33 1.4414940  0.11149397
## 8   1.00 0.9406227 -0.05937731
## 9   2.33 2.5480234  0.21802342
## 10 10.00 9.7820490 -0.21795103
## 11  3.00 2.7816793 -0.21832069
## 12  1.67 1.6829701  0.01297007
## 13  2.67 2.4514955 -0.21850453
## 14  1.67 1.4521225 -0.21787748
## 15  2.00 1.7828937 -0.21710634
## 16  0.00 0.2173967  0.21739671
## 17  0.00 0.2188750  0.21887499
## 18  0.00 0.2188183  0.21881830
## 19  0.00 0.2179866  0.21798665
## 20  1.00 1.2191915  0.21919146
## 21  1.33 1.5478579  0.21785794
## 22  2.00 1.7818863 -0.21811370
## 23  0.00 0.2177289  0.21772893
## 24  3.67 3.4521887 -0.21781132
## 25  0.33 0.4951871  0.16518713
## 26  0.67 0.6911078  0.02110782
## 27  0.67 0.8875763  0.21757630
## 28  0.33 0.5483805  0.21838053
## 29  0.00 0.2178195  0.21781951
## 30  0.00 0.2190978  0.21909784
plot(pl$obs, pl$pred)

## Comment: model is good.
print(svm.model)
## 
## Call:
## svm(formula = Avg_Crashes ~ ADT + Segment_Length + Shoulder_Width + 
##     Lane_Width + Class + CBD, data = twolane, cost = 100, gamma = 1)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  100 
##       gamma:  1 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  25

Four-lane

svm.model <- svm(Avg_Crashes~ADT +Segment_Length +Median_Width +Number_Lanes
                 + Shoulder_Width +Lane_Width +Class +CBD, data=fourlane,cost=100,gamma=1)
## Warning in svm.default(x, y, scale = scale, ..., na.action = na.action):
## Variable(s) 'Median_Width' and 'CBD' constant. Cannot scale data.
### modification
svm.model <- svm(Avg_Crashes~ADT +Segment_Length  +Number_Lanes
                 + Shoulder_Width +Lane_Width +Class, data=fourlane,cost=100,gamma=1)### Remove two redundant columns. Check warnings above.  
svm.pred <- predict(svm.model, fourlane[2:9])
print(svm.model)
## 
## Call:
## svm(formula = Avg_Crashes ~ ADT + Segment_Length + Number_Lanes + 
##     Shoulder_Width + Lane_Width + Class, data = fourlane, cost = 100, 
##     gamma = 1)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  100 
##       gamma:  1 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  27
pl <- cbind(obs= fourlane$Avg_Crashes, pred=svm.pred, diff=svm.pred-fourlane$Avg_Crashes)
pl <- data.frame(pl)
pl
##     obs       pred        diff
## 31 2.67  2.9422229  0.27222294
## 32 0.67  0.9422006  0.27220058
## 33 8.00  7.7291994 -0.27080057
## 34 0.00 -0.1859454 -0.18594539
## 35 0.00  0.2718133  0.27181326
## 36 0.67  0.9420076  0.27200758
## 37 1.00  1.2723890  0.27238903
## 38 1.00  1.2729382  0.27293822
## 39 6.33  6.0571761 -0.27282394
## 40 0.67  0.9409801  0.27098013
## 41 4.00  3.7269801 -0.27301992
## 42 5.67  5.9419802  0.27198020
## 43 3.67  3.3979686 -0.27203142
## 44 6.67  6.3979686 -0.27203144
## 45 1.33  1.6020274  0.27202745
## 46 9.00  8.7293041 -0.27069591
## 47 3.00  3.0804473  0.08044729
## 48 4.00  3.7287935 -0.27120652
## 49 3.67  4.8744353  1.20443526
## 50 0.67  0.9414899  0.27148991
## 51 0.33  0.6015183  0.27151834
## 52 0.33  0.6024319  0.27243186
## 53 1.33  1.0589257 -0.27107426
## 54 1.67  1.9411041  0.27110409
## 55 2.00  2.2714316  0.27143156
## 56 1.33  1.6010484  0.27104836
## 57 1.00  0.7267401 -0.27325994
## 58 2.33  2.4748522  0.14485218
## 59 5.33  5.0588991 -0.27110092
## 60 9.00  8.7281731 -0.27182688
plot(pl$obs, pl$pred)

## Comment: model is good.
print(svm.model)
## 
## Call:
## svm(formula = Avg_Crashes ~ ADT + Segment_Length + Number_Lanes + 
##     Shoulder_Width + Lane_Width + Class, data = fourlane, cost = 100, 
##     gamma = 1)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  100 
##       gamma:  1 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  27

Interstate

svm.model <- svm(Avg_Crashes~ADT +Segment_Length +Median_Width +Number_Lanes
                 + Shoulder_Width +Lane_Width +Class +CBD, data=int,cost=100,gamma=1)
## Warning in svm.default(x, y, scale = scale, ..., na.action = na.action):
## Variable(s) 'Lane_Width' and 'Class' and 'CBD' constant. Cannot scale data.
### modification
svm.model <- svm(Avg_Crashes~ADT +Segment_Length  +Number_Lanes
                 + Shoulder_Width, data=fourlane,cost=100,gamma=1)### Remove three redundant columns. Check warnings above.  

svm.pred <- predict(svm.model, int[2:9])
print(svm.model)
## 
## Call:
## svm(formula = Avg_Crashes ~ ADT + Segment_Length + Number_Lanes + 
##     Shoulder_Width, data = fourlane, cost = 100, gamma = 1)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  100 
##       gamma:  1 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  27
### write.csv(svm.pred, "svm_int.csv")
pl <- cbind(obs= int$Avg_Crashes, pred=svm.pred, diff=svm.pred-int$Avg_Crashes)
pl <- data.frame(pl)
pl
##      obs     pred         diff
## 61  3.33 1.738589  -1.59141106
## 62  4.67 1.738553  -2.93144699
## 63  0.33 1.738553   1.40855301
## 64  6.33 1.738578  -4.59142182
## 65 26.67 1.738553 -24.93144699
## 66  6.00 1.738553  -4.26144699
## 67  0.67 1.738553   1.06855301
## 68  1.67 1.738553   0.06855301
## 69  0.00 1.738806   1.73880567
## 70  1.00 1.738701   0.73870096
## 71  2.33 1.738553  -0.59144699
## 72  1.00 1.738553   0.73855301
## 73  0.33 1.738553   1.40855321
## 74  0.67 1.738553   1.06855301
## 75  2.00 1.738596  -0.26140355
## 76  0.00 1.738553   1.73855301
## 77  0.00 1.738553   1.73855301
## 78  4.67 1.738553  -2.93144699
## 79  9.67 1.738557  -7.93144345
## 80  0.33 1.739053   1.40905337
## 81  1.00 1.738745   0.73874466
## 82  0.33 1.738553   1.40855301
## 83  2.00 1.738914  -0.26108575
## 84  3.33 1.738556  -1.59144391
## 85  0.00 1.738553   1.73855301
## 86  1.33 1.738930   0.40893043
## 87 11.00 1.738553  -9.26144699
## 88  0.33 1.738553   1.40855301
## 89  7.67 1.738933  -5.93106668
## 90 14.33 1.738553 -12.59144699
plot(pl$obs, pl$pred)

## Comment: model is not good at all.
print(svm.model)
## 
## Call:
## svm(formula = Avg_Crashes ~ ADT + Segment_Length + Number_Lanes + 
##     Shoulder_Width, data = fourlane, cost = 100, gamma = 1)
## 
## 
## Parameters:
##    SVM-Type:  eps-regression 
##  SVM-Kernel:  radial 
##        cost:  100 
##       gamma:  1 
##     epsilon:  0.1 
## 
## 
## Number of Support Vectors:  27

Conducted by: Subasish Das