set.seed(1)
x1=runif (500) -0.5
x2=runif (500) -0.5
y=1*(x1^2-x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "purple", xlab = "X1", ylab = "X2", pch = "*")
points(x1[y == 1], x2[y == 1], col = "green", pch = 4)
library(ISLR)
attach(Auto)
median.auto=median(Auto$mpg)
median.auto
## [1] 22.75
Auto$med=ifelse(Auto$mpg > median.auto, 1, 0)
Auto$med=as.factor(Auto$med)
set.seed(1)
library(e1071)
svm.fit=svm(med~., data=Auto , kernel ="linear", cost =0.1, scale=FALSE)
tune.out = tune(svm, med~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.02051282 0.02648194
## 5 1e+01 0.02051282 0.02648194
## 6 1e+02 0.03076923 0.03151981
Cost=1 results is the lowest cross-validation error rate.
set.seed(1)
tune.out.polyn = tune(svm, med ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out.polyn)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5130128
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5511538 0.04366593
## 2 1.0 2 0.5511538 0.04366593
## 3 5.0 2 0.5511538 0.04366593
## 4 10.0 2 0.5130128 0.08963366
## 5 0.1 3 0.5511538 0.04366593
## 6 1.0 3 0.5511538 0.04366593
## 7 5.0 3 0.5511538 0.04366593
## 8 10.0 3 0.5511538 0.04366593
## 9 0.1 4 0.5511538 0.04366593
## 10 1.0 4 0.5511538 0.04366593
## 11 5.0 4 0.5511538 0.04366593
## 12 10.0 4 0.5511538 0.04366593
set.seed(1)
tune.out_rad = tune(svm, med ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1,1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out_rad)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.01
##
## - best performance: 0.02557692
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.08929487 0.04382379
## 2 1.0 1e-02 0.07403846 0.03522110
## 3 5.0 1e-02 0.04852564 0.03303346
## 4 10.0 1e-02 0.02557692 0.02093679
## 5 0.1 1e-01 0.07903846 0.03874545
## 6 1.0 1e-01 0.05371795 0.03525162
## 7 5.0 1e-01 0.02820513 0.03299190
## 8 10.0 1e-01 0.03076923 0.03375798
## 9 0.1 1e+00 0.55115385 0.04366593
## 10 1.0 1e+00 0.06384615 0.04375618
## 11 5.0 1e+00 0.05884615 0.04020934
## 12 10.0 1e+00 0.05884615 0.04020934
## 13 0.1 5e+00 0.55115385 0.04366593
## 14 1.0 5e+00 0.49493590 0.04724924
## 15 5.0 5e+00 0.48217949 0.05470903
## 16 10.0 5e+00 0.48217949 0.05470903
## 17 0.1 1e+01 0.55115385 0.04366593
## 18 1.0 1e+01 0.51794872 0.05063697
## 19 5.0 1e+01 0.51794872 0.04917316
## 20 10.0 1e+01 0.51794872 0.04917316
## 21 0.1 1e+02 0.55115385 0.04366593
## 22 1.0 1e+02 0.55115385 0.04366593
## 23 5.0 1e+02 0.55115385 0.04366593
## 24 10.0 1e+02 0.55115385 0.04366593
Essentially, instead of typing
plot(svmfit , dat)
where svmfit contains your fitted model and dat is a data frame containing your data, you can type
plot(svmfit , dat , x1∼x4)
in order to plot just the first and fourth variables. However, you must replace x1 and x4 with the correct variable names. To find out more, type ?plot.svm.
svm.linear = svm(med ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(med ~ ., data = Auto, kernel = "polynomial", cost = 10,degree = 2)
svm.radial = svm(med ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "med", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.linear)
detach(Auto)
attach(OJ)
set.seed(1)
train=sample (1070,800)
training=OJ[train,]
testing=OJ[-train,]
svm.fit8 = svm(Purchase ~ ., kernel = "linear", data = training, cost = 0.01)
summary(svm.fit8)
##
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
there were 435 support vectors, 219 in one class and 216 in the other.
ypreds=predict(svm.fit8,training)
table(predict=ypreds,truth=training$Purchase)
## truth
## predict CH MM
## CH 420 75
## MM 65 240
Error_Training = 1-(420+240)/800
Error_Training
## [1] 0.175
17.5% for Training
ypreds=predict(svm.fit8,testing)
table(predict=ypreds,truth=testing$Purchase)
## truth
## predict CH MM
## CH 153 33
## MM 15 69
Error_Testing = 1-(153+69)/270
Error_Testing
## [1] 0.1777778
17.78 for Testing.
set.seed(1)
tune.out8=tune(svm ,Purchase~.,data=OJ ,kernel ="linear",ranges=list(cost=c (0.001, 0.01, 0.1, 1,5,10) ))
summary(tune.out8)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.1626168
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.2373832 0.04561497
## 2 1e-02 0.1691589 0.04024604
## 3 1e-01 0.1663551 0.03984617
## 4 1e+00 0.1626168 0.03945456
## 5 5e+00 0.1654206 0.03917066
## 6 1e+01 0.1682243 0.03865942
bestmod=tune.out8$best.model
summary(bestmod)
##
## Call:
## best.tune(method = svm, train.x = Purchase ~ ., data = OJ, ranges = list(cost = c(0.001,
## 0.01, 0.1, 1, 5, 10)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 1
##
## Number of Support Vectors: 442
##
## ( 221 221 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
ypred_best1=predict(bestmod,training)
table(predict=ypred_best1,truth=training$Purchase)
## truth
## predict CH MM
## CH 424 69
## MM 61 246
Error_Training8 = 1-(424+246)/800
Error_Training8
## [1] 0.1625
For the Training set the Error is 16.25%
ypred_best=predict(bestmod,testing)
table(predict=ypred_best,truth=testing$Purchase)
## truth
## predict CH MM
## CH 155 29
## MM 13 73
Error_Testing8 = 1-(155+73)/270
Error_Testing8
## [1] 0.1555556
For the testing set the error is 15.56%
set.seed(1)
svm.radial = svm(Purchase ~ ., data = training, kernel = "radial")
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
rad_pred=predict(svm.radial,training)
table(predict=rad_pred,truth=training$Purchase)
## truth
## predict CH MM
## CH 441 77
## MM 44 238
Error_Rad8 = 1-(441+238)/800
Error_Rad8
## [1] 0.15125
The error rate is 15.13%
rad_pred1=predict(svm.radial,testing)
table(predict=rad_pred1,truth=testing$Purchase)
## truth
## predict CH MM
## CH 151 33
## MM 17 69
Error_Rad8tes = 1-(151+69)/270
Error_Rad8tes
## [1] 0.1851852
The error rate for testing set is 18.5%
set.seed(1)
svm.poly = svm(Purchase ~ ., data = training, kernel = "poly", degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "poly", degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 447
##
## ( 225 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
poly_pred=predict(svm.poly,training)
table(predict=poly_pred,truth=training$Purchase)
## truth
## predict CH MM
## CH 449 110
## MM 36 205
Error_Pol = 1-(449+205)/800
Error_Pol
## [1] 0.1825
The error rate is 18.25%
poly_pred1=predict(svm.poly,testing)
table(predict=poly_pred1,truth=testing$Purchase)
## truth
## predict CH MM
## CH 153 45
## MM 15 57
Error_Poltes = 1-(153+57)/270
Error_Poltes
## [1] 0.2222222
The error rate is 22.22%
Using a support vector machine with a radial kernel performed lowest error rates.