data("Auto")
data("OJ")
We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.
set.seed(1)
x1=runif(500)-0.5
x2=runif(500)-0.5
y=1*(x1^2-x2^2>0)
df=data.frame(x1, x2, y=factor(y))
plot(x1, x2, col=2-y)
logreg=glm(y~x1+x2, data=df, family="binomial")
summary(logreg)
##
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial", data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.179 -1.139 -1.112 1.206 1.257
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260 0.089579 -0.974 0.330
## x1 0.196199 0.316864 0.619 0.536
## x2 -0.002854 0.305712 -0.009 0.993
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 692.18 on 499 degrees of freedom
## Residual deviance: 691.79 on 497 degrees of freedom
## AIC: 697.79
##
## Number of Fisher Scoring iterations: 3
prob=predict(logreg, data=df, type="response")
pred=rep(0,500)
pred[prob>0.50]=1
plot(x1, x2, col=2-pred)
logregnon=glm(y~x1+x2+I(x1^2)+I(x2^2), data=df, family="binomial")
summary(logregnon)
##
## Call:
## glm(formula = y ~ x1 + x2 + I(x1^2) + I(x2^2), family = "binomial",
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.079e-03 -2.000e-08 -2.000e-08 2.000e-08 1.297e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.530 526.853 -0.020 0.984
## x1 115.895 6067.885 0.019 0.985
## x2 -1.604 4002.215 0.000 1.000
## I(x1^2) 18538.679 528515.760 0.035 0.972
## I(x2^2) -18235.099 520182.819 -0.035 0.972
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9218e+02 on 499 degrees of freedom
## Residual deviance: 4.2881e-06 on 495 degrees of freedom
## AIC: 10
##
## Number of Fisher Scoring iterations: 25
probnon=predict(logregnon, data=df, type="response")
prednon=rep(0,500)
prednon[probnon>0.50]=1
plot(x1, x2, col=2-prednon)
set.seed(1)
t=tune(svm,y~x1+x2, data=df, kernel="linear", r=list(cost=c(.01, .1, 1, 5, 10, 100)))
bmod=t$best.model
pred=predict(bmod, newdata=df, type="response")
plot(x1, x2, col=pred)
set.seed(1)
tnon=tune(svm, y~x1+x2, data=df, kernel="radial", r=list(cost=c(.01, .1, 1, 10, 100, 1000), gamma=c(.5, 1, 2, 3, 4)))
bmodnon=tnon$best.model
pred=predict(bmodnon, newdata=df, type="response")
plot(x1, x2, col=pred)
The logistic regression model and the svm model did not accurately predict the findings. While the logistic regression model using non-linear approach and svm model using radial for kernel did accurately predict the results.
In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set.
mpg1=rep(NA, length(Auto$mpg))
for (i in 1:length(Auto$mpg)){
if (Auto$mpg[i] > median(Auto$mpg))
mpg1[i]=1
else mpg1[i]=0
}
Auto$mpg=as.factor(mpg1)
set.seed(1)
t2=tune(svm, mpg~., data=Auto, kernel="linear", r=list(cost=c(.01, .1, 1, 5, 10, 100)))
summary(t2)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.08673077
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.08923077 0.04698309
## 2 1e-01 0.08673077 0.04040897
## 3 1e+00 0.09961538 0.04923181
## 4 5e+00 0.11230769 0.05826857
## 5 1e+01 0.11237179 0.05701890
## 6 1e+02 0.11750000 0.06208951
When cost is .1 it results in the best error rate.
set.seed(1)
tradial=tune(svm, mpg~., data=Auto, kernel="radial", r=list(cost=c(.01, .1, 1, 10, 100, 1000), gamma=c(.5, 1, 2, 3, 4)))
summary(tradial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 1
##
## - best performance: 0.07897436
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 0.5 0.55115385 0.04366593
## 2 1e-01 0.5 0.08410256 0.04164179
## 3 1e+00 0.5 0.08673077 0.04708817
## 4 1e+01 0.5 0.09173077 0.04008042
## 5 1e+02 0.5 0.09429487 0.03796985
## 6 1e+03 0.5 0.09429487 0.03796985
## 7 1e-02 1.0 0.55115385 0.04366593
## 8 1e-01 1.0 0.55115385 0.04366593
## 9 1e+00 1.0 0.07903846 0.04891067
## 10 1e+01 1.0 0.07897436 0.04869339
## 11 1e+02 1.0 0.07897436 0.04869339
## 12 1e+03 1.0 0.07897436 0.04869339
## 13 1e-02 2.0 0.55115385 0.04366593
## 14 1e-01 2.0 0.55115385 0.04366593
## 15 1e+00 2.0 0.13769231 0.06926822
## 16 1e+01 2.0 0.13512821 0.06692968
## 17 1e+02 2.0 0.13512821 0.06692968
## 18 1e+03 2.0 0.13512821 0.06692968
## 19 1e-02 3.0 0.55115385 0.04366593
## 20 1e-01 3.0 0.55115385 0.04366593
## 21 1e+00 3.0 0.37012821 0.14598387
## 22 1e+01 3.0 0.32935897 0.14522774
## 23 1e+02 3.0 0.32935897 0.14522774
## 24 1e+03 3.0 0.32935897 0.14522774
## 25 1e-02 4.0 0.55115385 0.04366593
## 26 1e-01 4.0 0.55115385 0.04366593
## 27 1e+00 4.0 0.47955128 0.05564953
## 28 1e+01 4.0 0.47698718 0.06085690
## 29 1e+02 4.0 0.47698718 0.06085690
## 30 1e+03 4.0 0.47698718 0.06085690
When cost is 10 and gamma is 1 results in the best error rate when using radial.
set.seed(1)
tpoly=tune(svm, mpg~., data=Auto, kernel="polynomial", r=list(cost=c(.01, .1, 1, 10, 100, 1000), degree=c(1, 2, 3, 4)))
summary(tpoly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 1
##
## - best performance: 0.08173077
##
## - Detailed performance results:
## cost degree error dispersion
## 1 1e-02 1 0.55115385 0.04366593
## 2 1e-01 1 0.28596154 0.10442771
## 3 1e+00 1 0.10717949 0.04299154
## 4 1e+01 1 0.08416667 0.04010502
## 5 1e+02 1 0.08173077 0.03986661
## 6 1e+03 1 0.11237179 0.05840964
## 7 1e-02 2 0.55115385 0.04366593
## 8 1e-01 2 0.55115385 0.04366593
## 9 1e+00 2 0.55115385 0.04366593
## 10 1e+01 2 0.52064103 0.08505283
## 11 1e+02 2 0.31673077 0.09410274
## 12 1e+03 2 0.27846154 0.10298534
## 13 1e-02 3 0.55115385 0.04366593
## 14 1e-01 3 0.55115385 0.04366593
## 15 1e+00 3 0.55115385 0.04366593
## 16 1e+01 3 0.55115385 0.04366593
## 17 1e+02 3 0.40326923 0.10793388
## 18 1e+03 3 0.25794872 0.09305854
## 19 1e-02 4 0.55115385 0.04366593
## 20 1e-01 4 0.55115385 0.04366593
## 21 1e+00 4 0.55115385 0.04366593
## 22 1e+01 4 0.55115385 0.04366593
## 23 1e+02 4 0.55115385 0.04366593
## 24 1e+03 4 0.55115385 0.04366593
When cost is 100 and degree is 1 result in the best error rate when using polynomial.
plot (svmfit , dat)
where svmfit contains your fitted model and dat is a data frame containing your data, you can type
plot (svmfit , dat , x1 ∼ x4)
in order to plot just the first and fourth variables. However, you must replace x1 and x4 with the correct variable names. To find out more, type ?plot.svm.
svmradial=svm(mpg~., data=Auto, kernel="radial", cost=10, gamma=1)
plot(svmradial, Auto, cylinders~weight)
From this plot it shows that lower cylinder and weight results in better gas mileage.
This problem involves the OJ data set which is part of the ISLR2 package.
set.seed(1)
t=sample(1:nrow(OJ),800)
train=OJ[t,]
test=OJ[-t,]
svmoj=svm(Purchase~., data=train, kernel="linear", cost=.01)
summary(svmoj)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
The summary shows that there are 435 support vectors almost evenly split at 219 and 216.
teste=mean(predict(svmoj, test)!=test$Purchase)
teste
## [1] 0.1777778
traine=mean(predict(svmoj, train)!=train$Purchase)
traine
## [1] 0.175
set.seed(1)
toj=tune(svm, Purchase~., data=train, kernel="linear", r=list(cost=c(.01, .1, 1, 5, 10)))
summary(toj)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 5.00 0.17250 0.03162278
## 5 10.00 0.17375 0.03197764
svmoj2=svm(Purchase~., data=train, kernel="linear", cost=.1)
teste2=mean(predict(svmoj2, test)!=test$Purchase)
teste2
## [1] 0.162963
traine2=mean(predict(svmoj2, train)!=train$Purchase)
traine2
## [1] 0.165
svmojr=svm(Purchase~., data=train, kernel="radial", cost=.01, gamma=1)
summary(svmojr)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "radial", cost = 0.01,
## gamma = 1)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 656
##
## ( 341 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
tester=mean(predict(svmojr, test)!=test$Purchase)
tester
## [1] 0.3777778
trainer=mean(predict(svmojr, train)!=train$Purchase)
trainer
## [1] 0.39375
set.seed(1)
tojr=tune(svm, Purchase~., data=train, kernel="radial", r=list(cost=c(.01, .1, 1, 5, 10)), gamma=1)
summary(tojr)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.225
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.04007372
## 2 0.10 0.34500 0.04937104
## 3 1.00 0.22625 0.04466309
## 4 5.00 0.22500 0.04487637
## 5 10.00 0.23000 0.04684490
svmojr2=svm(Purchase~., data=train, kernel="radial", cost=5, gamma=1)
tester2=mean(predict(svmojr2, test)!=test$Purchase)
tester2
## [1] 0.2037037
trainer2=mean(predict(svmojr2, train)!=train$Purchase)
trainer2
## [1] 0.09625
svmojp=svm(Purchase~., data=train, kernel="polynomial", cost=.01, degree=2)
summary(svmojp)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "polynomial",
## cost = 0.01, degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 636
##
## ( 321 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
testep=mean(predict(svmojr, test)!=test$Purchase)
testep
## [1] 0.3777778
trainep=mean(predict(svmojr, train)!=train$Purchase)
trainep
## [1] 0.39375
set.seed(1)
tojp=tune(svm, Purchase~., data=train, kernel="polynomial", r=list(cost=c(.01, .1, 1, 5, 10)), degree=2)
summary(tojp)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39125 0.04210189
## 2 0.10 0.32125 0.05001736
## 3 1.00 0.20250 0.04116363
## 4 5.00 0.18250 0.03496029
## 5 10.00 0.18125 0.02779513
svmojp2=svm(Purchase~., data=train, kernel="polynomial", cost=10, degree=2)
testep2=mean(predict(svmojp2, test)!=test$Purchase)
testep2
## [1] 0.1888889
trainep2=mean(predict(svmojp2, train)!=train$Purchase)
trainep2
## [1] 0.15
Overall, the linear svm model produces the best results on the data.