library(e1071)
library(ISLR)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::lift() masks caret::lift()
x1 = runif(500)-0.5
x2 = runif(500)-0.5
y = 1*(x1^2-x2^2 > 0)
plot(x1,x2,col=ifelse(y,'red','blue'))
glm.fit=glm(y~. ,family='binomial', data=data.frame(x1,x2,y))
summary(glm.fit)
##
## Call:
## glm(formula = y ~ ., family = "binomial", data = data.frame(x1,
## x2, y))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.29828 -1.16084 -0.00025 1.17506 1.30403
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0001294 0.0896375 0.001 0.999
## x1 0.1760536 0.3142119 0.560 0.575
## x2 -0.4232675 0.3118969 -1.357 0.175
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 693.15 on 499 degrees of freedom
## Residual deviance: 691.01 on 497 degrees of freedom
## AIC: 697.01
##
## Number of Fisher Scoring iterations: 3
glm.pred=predict(glm.fit,data.frame(x1,x2))
plot(x1,x2,col=ifelse(glm.pred>0,'red','blue'),pch=ifelse(as.integer(glm.pred>0) == y,1,4))
glm.poly.fit=glm(y~poly(x1,2)+poly(x2,2) ,family='binomial', data=data.frame(x1,x2,y))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
glm.poly.pred=predict(glm.poly.fit,data.frame(x1,x2))
plot(x1,x2,col=ifelse(glm.poly.pred>0,'red','blue'),pch=ifelse(as.integer(glm.pred>0) == y,1,4))
svm.fit=svm(y~.,data=data.frame(x1,x2,y=as.factor(y)),kernel='linear')
svm.pred=predict(svm.fit,data.frame(x1,x2),type='response')
plot(x1,x2, col=ifelse(svm.pred!=0,'red','blue'),pch=ifelse(svm.pred==y,1,4))
svm.poly.fit=svm(y~.,data=data.frame(x1,x2,y=as.factor(y)),kernel='polynomial',degree=2)
svm.poly.pred=predict(svm.poly.fit,data.frame(x1,x2),type='response')
plot(x1,x2,col=ifelse(svm.poly.pred!=0,'red','blue'),pch=ifelse(svm.poly.pred == y,1,4))
The model using support vector machine with a polynomial kernels appers to have the best prediction power. It captured the non-linear nature of the predictors and the variable response. The regression line using the polynomial predictors also had a better accuracy than the SVM using the linear kernel as well.
Auto <- Auto
Auto$mpg=ifelse(Auto$mpg>median(Auto$mpg),1,0)
table(Auto$mpg)
##
## 0 1
## 196 196
costs=data.frame(cost=seq(0.05,100,length.out = 15))
svm.tune=tune(svm,mpg~.,data=Auto,ranges=costs,kernel='linear')
svm.tune
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.05
##
## - best performance: 0.1024354
params = data.frame(cost=seq(0.05,100,length.out = 5),degree=seq(1,100,length.out = 5))
svm.poly = tune(svm,mpg~.,data=Auto,ranges=params,kernel='polynomial')
svm.poly
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 1
##
## - best performance: 0.09770715
summary(svm.poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 100 1
##
## - best performance: 0.09770715
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.0500 1.00 0.30626637 0.04705768
## 2 25.0375 1.00 0.10253181 0.04963406
## 3 50.0250 1.00 0.10061535 0.05216098
## 4 75.0125 1.00 0.09906474 0.05223427
## 5 100.0000 1.00 0.09770715 0.05127389
## 6 0.0500 25.75 0.52807507 0.05673551
## 7 25.0375 25.75 0.52807507 0.05673551
## 8 50.0250 25.75 0.52807507 0.05673551
## 9 75.0125 25.75 0.52807507 0.05673551
## 10 100.0000 25.75 0.52807507 0.05673551
## 11 0.0500 50.50 0.52807507 0.05673551
## 12 25.0375 50.50 0.52807507 0.05673551
## 13 50.0250 50.50 0.52807507 0.05673551
## 14 75.0125 50.50 0.52807507 0.05673551
## 15 100.0000 50.50 0.52807507 0.05673551
## 16 0.0500 75.25 0.52807507 0.05673551
## 17 25.0375 75.25 0.52807507 0.05673551
## 18 50.0250 75.25 0.52807507 0.05673551
## 19 75.0125 75.25 0.52807507 0.05673551
## 20 100.0000 75.25 0.52807507 0.05673551
## 21 0.0500 100.00 0.52807507 0.05673551
## 22 25.0375 100.00 0.52807507 0.05673551
## 23 50.0250 100.00 0.52807507 0.05673551
## 24 75.0125 100.00 0.52807507 0.05673551
## 25 100.0000 100.00 0.52807507 0.05673551
plot(svm.tune$performance[,c(1,2)],type='l')
plot(svm.poly$performances)
set.seed(1)
train=sample(1:1070,800)
test=(1:1070)[-train]
svm.fit.oj=svm(Purchase~.,data=OJ,subset=train,cost=0.01,kernel='linear')
summary(svm.fit.oj)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ, cost = 0.01, kernel = "linear",
## subset = train)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
svm.pred.oj.train=predict(svm.fit.oj,OJ[train,])
mean(OJ$Purchase[train]!= svm.pred.oj.train)
## [1] 0.175
svm.pred.oj.test=predict(svm.fit.oj,OJ[test,])
mean(OJ$Purchase[test]!= svm.pred.oj.test)
## [1] 0.1777778
svm.tune.oj=tune(svm,Purchase~.,data=OJ[train,],ranges=data.frame(cost=seq(0.01,10,25)),kernel='linear')
summary(svm.tune.oj)
##
## Error estimation of 'svm' using 10-fold cross validation: 0.17375
0.1775
oj.svm.fit2=svm(Purchase~.,data=OJ,subset=train,cost=0.1775,kernel='linear')
oj.svm.pred.train2=predict(oj.svm.fit2,OJ[train,])
mean(OJ$Purchase[train]!= oj.svm.pred.train2)
## [1] 0.1675
oj.svm.pred.test2=predict(oj.svm.fit2,OJ[test,])
mean(OJ$Purchase[test]!= oj.svm.pred.test2)
## [1] 0.1666667
svm.fit3=svm(Purchase~.,data=OJ,subset=train,cost=0.01,kernel='radial')
svm.train.pred3=predict(svm.fit3,OJ[train,])
mean(OJ$Purchase[train]!= svm.train.pred3)
## [1] 0.39375
svm.test.pred3=predict(svm.fit3,OJ[test,])
mean(OJ$Purchase[test]!= svm.test.pred3)
## [1] 0.3777778
svm.fit4=svm(Purchase~.,data=OJ,subset=train,cost=0.01,kernel='polynomial',degree=2)
svm.train.pred4=predict(svm.fit4,OJ[train,])
mean(OJ$Purchase[train]!= svm.train.pred4)
## [1] 0.3725
svm.test.pred4=predict(svm.fit4,OJ[test,])
mean(OJ$Purchase[test]!= svm.test.pred4)
## [1] 0.3666667
The linear model with the optimal cost parameter had the smallest error rate.