library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ISLR)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x purrr::lift()   masks caret::lift()

Question 5

5a)

x1=runif(500)-0.5
x2=runif(500)-0.5
y=1*(x1^2-x2^2 > 0 )

5b)

plot(x1,x2,col=ifelse(y,'red','blue'))

### 5c)

require(glm)
## Loading required package: glm
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'glm'
glm.fit=glm(y~. ,family='binomial', data=data.frame(x1,x2,y))
glm.fit
## 
## Call:  glm(formula = y ~ ., family = "binomial", data = data.frame(x1, 
##     x2, y))
## 
## Coefficients:
## (Intercept)           x1           x2  
##     0.01572      0.13298     -0.11356  
## 
## Degrees of Freedom: 499 Total (i.e. Null);  497 Residual
## Null Deviance:       693.1 
## Residual Deviance: 692.8     AIC: 698.8

5d)

glm.pred=predict(glm.fit,data.frame(x1,x2))   # returns the log odds value.
plot(x1,x2,col=ifelse(glm.pred>0,'red','blue'),pch=ifelse(as.integer(glm.pred>0) == y,1,4))

5e)

glm.fit=glm(y~poly(x1,2)+poly(x2,2) ,family='binomial', data=data.frame(x1,x2,y))
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

5f)

glm.pred=predict(glm.fit,data.frame(x1,x2))     # returns the log-odds.
plot(x1,x2,col=ifelse(glm.pred>0,'red','blue'),pch=ifelse(as.integer(glm.pred>0) == y,1,4))

Again the circles are the correctly predicted points. All training observations are correctly predicted.

5g)

The same is done as before, but now with a linear SVM

require(e1071)
## Loading required package: e1071
svm.fit=svm(y~.,data=data.frame(x1,x2,y=as.factor(y)),kernel='linear')
svm.pred=predict(svm.fit,data.frame(x1,x2),type='response')
plot(x1,x2,col=ifelse(svm.pred!=0,'red','blue'),pch=ifelse(svm.pred == y,1,4))

svm.fit=svm(y~.,data=data.frame(x1,x2,y=as.factor(y)),kernel='polynomial',degree=2)
svm.pred=predict(svm.fit,data.frame(x1,x2),type='response')
plot(x1,x2,col=ifelse(svm.pred!=0,'red','blue'),pch=ifelse(svm.pred == y,1,4))

require(knitr)
## Loading required package: knitr
kable(table(y,as.integer(glm.pred>0)))
0 1
0 248 0
1 0 252
kable(table(y,svm.pred))
0 1
0 247 1
1 18 234

Question 7

7a)

Auto$mpg=ifelse(Auto$mpg>median(Auto$mpg),1,0)
table(Auto$mpg)
## 
##   0   1 
## 196 196
require(e1071)

costs=data.frame(cost=seq(0.05,100,length.out = 15))               # tuning grid for the cost parameter.
svm.tune=tune(svm,mpg~.,data=Auto,ranges=costs,kernel='linear')     # 10-fold cross validation.
svm.tune
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##  0.05
## 
## - best performance: 0.1025101
plot(svm.tune$performance[,c(1,2)],type='l')

### 7c)

params = data.frame(cost = seq(0.05,100,length.out=5), degree = seq(1,100,length.out = 5))
svm.poly = tune(svm,mpg~.,data=Auto,ranges=params,kernel='polynomial')
svm.poly
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##   100      1
## 
## - best performance: 0.09664753
params=data.frame(cost=seq(0.05,100,length.out = 5),gamma=seq(0.1,100,length.out = 5))
svm.radial=tune(svm,mpg~.,data=Auto,ranges=params,kernel='radial')
svm.radial
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##     cost gamma
##  25.0375   0.1
## 
## - best performance: 0.07177666

Question 8

8a)

set.seed(42)
train=sample(1:1070,800)
test=(1:1070)[-train]

tb=c()
res=c()
require(ISLR)
require(e1071)

svm.fit=svm(Purchase~.,data=OJ,subset=train,cost=0.01,kernel='linear')
summary(svm.fit)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ, cost = 0.01, kernel = "linear", 
##     subset = train)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  432
## 
##  ( 215 217 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
svm.pred=predict(svm.fit,OJ[train,])
kable(table(OJ[train,'Purchase'],svm.pred))
CH MM
CH 432 60
MM 77 231
mean(OJ$Purchase[train] != svm.pred)
## [1] 0.17125
res=cbind(res,'train'=mean(OJ$Purchase[train] != svm.pred))
svm.pred=predict(svm.fit,OJ[test,])
kable(table(OJ[test,'Purchase'],svm.pred))
CH MM
CH 142 19
MM 25 84
mean(OJ$Purchase[test] != svm.pred)
## [1] 0.162963
res=cbind(res,'test'=mean(OJ$Purchase[test] != svm.pred))
svm.tune=tune(svm,Purchase~.,data=OJ[train,],ranges=data.frame(cost=seq(0.01,10,25)),kernel='linear')
summary(svm.tune)
## 
## Error estimation of 'svm' using 10-fold cross validation: 0.1825
res=cbind(res,'CV'=svm.tune$best.performance)
svm.pred=predict(svm.tune$best.model,OJ[train,])
kable(table(OJ[train,'Purchase'],svm.pred))
CH MM
CH 432 60
MM 77 231
mean(OJ$Purchase[train] != svm.pred)
## [1] 0.17125
res=cbind(res,'train.tuned'=mean(OJ$Purchase[train] != svm.pred))
svm.pred=predict(svm.tune$best.model,OJ[test,])
kable(table(OJ[test,'Purchase'],svm.pred))
CH MM
CH 142 19
MM 25 84
mean(OJ$Purchase[test] != svm.pred)
## [1] 0.162963
res=cbind(res,'test.tuned'=mean(OJ$Purchase[test] != svm.pred))

tb=rbind(tb,res)
res=c()
svm.fit=svm(Purchase~.,data=OJ,subset=train,cost=0.01,kernel='radial')
summary(svm.fit)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ, cost = 0.01, kernel = "radial", 
##     subset = train)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  0.01 
## 
## Number of Support Vectors:  621
## 
##  ( 308 313 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
svm.pred=predict(svm.fit,OJ[train,])
kable(table(OJ[train,'Purchase'],svm.pred))
CH MM
CH 492 0
MM 308 0
mean(OJ$Purchase[train] != svm.pred)
## [1] 0.385
res=cbind(res,'train'=mean(OJ$Purchase[train] != svm.pred))


# test
svm.pred=predict(svm.fit,OJ[test,])
kable(table(OJ[test,'Purchase'],svm.pred))
CH MM
CH 161 0
MM 109 0
mean(OJ$Purchase[test] != svm.pred)
## [1] 0.4037037
res=cbind(res,'train'=mean(OJ$Purchase[test] != svm.pred))
svm.tune=tune(svm,Purchase~.,data=OJ[train,],ranges=data.frame(cost=seq(0.01,10,25)))
summary(svm.tune)
## 
## Error estimation of 'svm' using 10-fold cross validation: 0.385
res=cbind(res,'CV'=svm.tune$best.performance)
svm.pred=predict(svm.tune$best.model,OJ[train,])
kable(table(OJ[train,'Purchase'],svm.pred))
CH MM
CH 492 0
MM 308 0
mean(OJ$Purchase[train] != svm.pred)
## [1] 0.385
res=cbind(res,'train.tuned'=mean(OJ$Purchase[train] != svm.pred))


# test
svm.pred=predict(svm.tune$best.model,OJ[test,])
kable(table(OJ[test,'Purchase'],svm.pred))
CH MM
CH 161 0
MM 109 0
mean(OJ$Purchase[test] != svm.pred)
## [1] 0.4037037
res=cbind(res,'test.tuned'=mean(OJ$Purchase[test] != svm.pred))

tb=rbind(tb,res)
res=c()
svm.fit=svm(Purchase~.,data=OJ,subset=train,cost=0.01,kernel='polynomial')
summary(svm.fit)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ, cost = 0.01, kernel = "polynomial", 
##     subset = train)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  0.01 
##      degree:  3 
##      coef.0:  0 
## 
## Number of Support Vectors:  614
## 
##  ( 303 311 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
svm.pred=predict(svm.fit,OJ[train,])
kable(table(OJ[train,'Purchase'],svm.pred))
CH MM
CH 486 6
MM 287 21
mean(OJ$Purchase[train] != svm.pred)
## [1] 0.36625
res=cbind(res,'train'=mean(OJ$Purchase[train] != svm.pred))

# test
svm.pred=predict(svm.fit,OJ[test,])
kable(table(OJ[test,'Purchase'],svm.pred))
CH MM
CH 160 1
MM 101 8
mean(OJ$Purchase[test] != svm.pred)
## [1] 0.3777778
res=cbind(res,'test'=mean(OJ$Purchase[test] != svm.pred))
svm.tune=tune(svm,Purchase~.,data=OJ[train,],ranges=data.frame(cost=seq(0.01,10,25)),kernel='polynomial')
summary(svm.tune)
## 
## Error estimation of 'svm' using 10-fold cross validation: 0.36625
res=cbind(res,'CV'=svm.tune$best.performance)
svm.pred=predict(svm.tune$best.model,OJ[train,])
kable(table(OJ[train,'Purchase'],svm.pred))
CH MM
CH 486 6
MM 287 21
mean(OJ$Purchase[train] != svm.pred)
## [1] 0.36625
res=cbind(res,'train.tuned'=mean(OJ$Purchase[train] != svm.pred))


# test
svm.pred=predict(svm.tune$best.model,OJ[test,])
kable(table(OJ[test,'Purchase'],svm.pred))
CH MM
CH 160 1
MM 101 8
mean(OJ$Purchase[test] != svm.pred)
## [1] 0.3777778
res=cbind(res,'test.tuned'=mean(OJ$Purchase[test] != svm.pred))

tb=rbind(tb,res)
rownames(tb)=c('LINEAR','POLYNOMIAL','RADIAL')
kable(tb)
train test CV train.tuned test.tuned
LINEAR 0.17125 0.1629630 0.18250 0.17125 0.1629630
POLYNOMIAL 0.38500 0.4037037 0.38500 0.38500 0.4037037
RADIAL 0.36625 0.3777778 0.36625 0.36625 0.3777778