library(MASS)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(e1071)
## Warning: package 'e1071' was built under R version 4.0.3
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5

Question 5

A)

set.seed(421)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * (x1^2 - x2^2 > 0)

B)

plot(x1[y == 0], x2[y == 0], col = "red", xlab = "x1", ylab = "x2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "blue", pch = 4)

Non-linear

C)

lm.gfit = glm(y ~ x1 + x2, family = binomial)
summary(lm.gfit)
## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.278  -1.227   1.089   1.135   1.175  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.11999    0.08971   1.338    0.181
## x1          -0.16881    0.30854  -0.547    0.584
## x2          -0.08198    0.31476  -0.260    0.795
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 691.35  on 499  degrees of freedom
## Residual deviance: 690.99  on 497  degrees of freedom
## AIC: 696.99
## 
## Number of Fisher Scoring iterations: 3

D)

data = data.frame(x1 = x1, x2 = x2, y = y)
lm.prob = predict(lm.gfit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.52, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)

E) We use squares, product interaction terms to fit the model

lm.gfit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

F)

lm.prob = predict(lm.gfit, data, type = "response")
lm.pred = ifelse(lm.prob > 0.5, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)

This non-linear

G)

svm.fit = svm(as.factor(y) ~ x1 + x2, data, kernel = "linear", cost = 0.1)
svm.pred = predict(svm.fit, data)
data.pos = data[svm.pred == 1, ]
data.neg = data[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)

A linear kernel

H)

svm.fit = svm(as.factor(y) ~ x1 + x2, data, gamma = 1)
svm.pred = predict(svm.fit, data)
data.pos = data[svm.pred == 1, ]
data.neg = data[svm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "blue", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "red", pch = 4)

This closely similar to the true class decision boundry ### I)

SVM with non-linear kernel are good at finding non-linear boundaries

Question 7

In this problem, you will use support vector approaches in order to

predict whether a given car gets high or low gas mileage based on the

Auto data set

Create a binary variable that takes on a 1 for cars with gas

mileage above the median, and a 0 for cars with gas mileage

below the median

A)

gas.med = median(Auto$mpg)
new.var = ifelse(Auto$mpg > gas.med, 1, 0)
Auto$mpglevel = as.factor(new.var)

B)

set.seed(3255)
tune.out = tune(svm, mpglevel~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.01269231 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-02 0.07397436 0.06863413
## 2 1e-01 0.05102564 0.06923024
## 3 1e+00 0.01269231 0.02154160
## 4 5e+00 0.01519231 0.01760469
## 5 1e+01 0.02025641 0.02303772
## 6 1e+02 0.03294872 0.02898463

C)

set.seed(21)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.5435897 
## 
## - Detailed performance results:
##    cost degree     error dispersion
## 1   0.1      2 0.5587821 0.04538579
## 2   1.0      2 0.5587821 0.04538579
## 3   5.0      2 0.5587821 0.04538579
## 4  10.0      2 0.5435897 0.05611162
## 5   0.1      3 0.5587821 0.04538579
## 6   1.0      3 0.5587821 0.04538579
## 7   5.0      3 0.5587821 0.04538579
## 8  10.0      3 0.5587821 0.04538579
## 9   0.1      4 0.5587821 0.04538579
## 10  1.0      4 0.5587821 0.04538579
## 11  5.0      4 0.5587821 0.04538579
## 12 10.0      4 0.5587821 0.04538579
set.seed(463)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##    10  0.01
## 
## - best performance: 0.02551282 
## 
## - Detailed performance results:
##    cost gamma      error dispersion
## 1   0.1 1e-02 0.09429487 0.04814900
## 2   1.0 1e-02 0.07897436 0.03875105
## 3   5.0 1e-02 0.05352564 0.02532795
## 4  10.0 1e-02 0.02551282 0.02417610
## 5   0.1 1e-01 0.07891026 0.03847631
## 6   1.0 1e-01 0.05602564 0.02881876
## 7   5.0 1e-01 0.03826923 0.03252085
## 8  10.0 1e-01 0.03320513 0.02964746
## 9   0.1 1e+00 0.57660256 0.05479863
## 10  1.0 1e+00 0.06628205 0.02996211
## 11  5.0 1e+00 0.06115385 0.02733573
## 12 10.0 1e+00 0.06115385 0.02733573
## 13  0.1 5e+00 0.57660256 0.05479863
## 14  1.0 5e+00 0.51538462 0.06642516
## 15  5.0 5e+00 0.50775641 0.07152757
## 16 10.0 5e+00 0.50775641 0.07152757
## 17  0.1 1e+01 0.57660256 0.05479863
## 18  1.0 1e+01 0.53833333 0.05640443
## 19  5.0 1e+01 0.53070513 0.05708644
## 20 10.0 1e+01 0.53070513 0.05708644
## 21  0.1 1e+02 0.57660256 0.05479863
## 22  1.0 1e+02 0.57660256 0.05479863
## 23  5.0 1e+02 0.57660256 0.05479863
## 24 10.0 1e+02 0.57660256 0.05479863

D)

svm.linear = svm(mpglevel ~., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10, degree = 2)
svm.radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
  for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))}
  } 
plotpairs(svm.linear)

plotpairs(svm.poly)

plotpairs(svm.radial)

8) This problem involves the OJ data set which is part of the ISLR package.

A) Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations.

set.seed(9004)
train = sample(dim(OJ)[1], 800)
OJ.train = OJ[train, ]
OJ.test = OJ[-train, ]

B)Fit support vector classifier to the training data using cost=o.1, with purchase as the response and the other variables as predictors. Use the summary() function to produce statistics

svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = 0.1)
summary(svm.linear)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.1)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.1 
## 
## Number of Support Vectors:  351
## 
##  ( 177 174 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

432 Support Vectors, 177 in CH and 174 in MM

C)What are the training and test error rates?

train.pred = predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
##     train.pred
##       CH  MM
##   CH 424  59
##   MM  72 245

Overall correctness in training is 0.16375

test.pred = predict(svm.linear, OJ.test)
table(OJ.test$Purchase, test.pred)
##     test.pred
##       CH  MM
##   CH 145  25
##   MM  21  79

Overall correctness in test data is 0.1703704

D)Use the tune() function to select an optimal cost. Value range 0.01 to 10

tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "linear", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.17 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.17375 0.04875178
## 2   0.01778279 0.17125 0.04788949
## 3   0.03162278 0.17375 0.04875178
## 4   0.05623413 0.17500 0.04526159
## 5   0.10000000 0.17250 0.04401704
## 6   0.17782794 0.17500 0.04787136
## 7   0.31622777 0.17375 0.04581439
## 8   0.56234133 0.17500 0.04787136
## 9   1.00000000 0.17750 0.04851976
## 10  1.77827941 0.17750 0.04556741
## 11  3.16227766 0.17375 0.04267529
## 12  5.62341325 0.17375 0.04016027
## 13 10.00000000 0.17000 0.04090979

Tuning shows that optimal cost is 1

E)Compute the training and test error rates using this new value for cost

svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJ.train, cost = tune.out$best.parameters$cost)
train.pred = predict(svm.linear, OJ.train)
table(OJ.train$Purchase, train.pred)
##     train.pred
##       CH  MM
##   CH 427  56
##   MM  69 248

The training error decreases to 0.16375

F)Repeat parts (b) through (e) using support vector machine with radial kernel. Use the default value for gamma

set.seed(410)
svm.radial = svm(Purchase ~ ., data = OJ.train, kernel = "radial")
summary(svm.radial)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  371
## 
##  ( 188 183 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
train.pred = predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
##     train.pred
##       CH  MM
##   CH 441  42
##   MM  74 243
test.pred = predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
##     test.pred
##       CH  MM
##   CH 148  22
##   MM  27  73

SVM with radial basis kernels with gamma is 371 with 188 in CH and 183 in MM

set.seed(755)
tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "radial", ranges = list(cost = 10^seq(-2, 
    1, by = 0.25)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##       cost
##  0.3162278
## 
## - best performance: 0.1675 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.39625 0.06615691
## 2   0.01778279 0.39625 0.06615691
## 3   0.03162278 0.35375 0.09754807
## 4   0.05623413 0.20000 0.04249183
## 5   0.10000000 0.17750 0.04073969
## 6   0.17782794 0.17125 0.03120831
## 7   0.31622777 0.16750 0.04216370
## 8   0.56234133 0.16750 0.03782269
## 9   1.00000000 0.17250 0.03670453
## 10  1.77827941 0.17750 0.03374743
## 11  3.16227766 0.18000 0.04005205
## 12  5.62341325 0.18000 0.03446012
## 13 10.00000000 0.18625 0.04427267
svm.radial = svm(Purchase ~ ., data = OJ.train, kernel = "radial", cost = tune.out$best.parameters$cost)
train.pred = predict(svm.radial, OJ.train)
table(OJ.train$Purchase, train.pred)
##     train.pred
##       CH  MM
##   CH 440  43
##   MM  81 236

909

test.pred = predict(svm.radial, OJ.test)
table(OJ.test$Purchase, test.pred)
##     test.pred
##       CH  MM
##   CH 145  25
##   MM  28  72

0.1851852 Tuning slightly decreases training error to

G)Repeat parts (b) through (e) using support vector machine with radial kernel. Set degree = 2

set.seed(8112)
svm.poly = svm(Purchase ~ ., data = OJ.train, kernel = "poly", degree = 2)
summary(svm.poly)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "poly", degree = 2)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  456
## 
##  ( 232 224 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

456 Vectors with 232 in CH and 224 in MM

train.pred = predict(svm.poly, OJ.train)
table(OJ.train$Purchase, train.pred)
##     train.pred
##       CH  MM
##   CH 450  33
##   MM 111 206
test.pred = predict(svm.poly, OJ.test)
table(OJ.test$Purchase, test.pred)
##     test.pred
##       CH  MM
##   CH 149  21
##   MM  34  66
set.seed(322)
tune.out = tune(svm, Purchase ~ ., data = OJ.train, kernel = "poly", degree = 2, 
    ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.18 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.39250 0.05749396
## 2   0.01778279 0.37500 0.05863020
## 3   0.03162278 0.36375 0.05756940
## 4   0.05623413 0.33875 0.06626179
## 5   0.10000000 0.30375 0.05172376
## 6   0.17782794 0.24000 0.04440971
## 7   0.31622777 0.21000 0.04362084
## 8   0.56234133 0.20250 0.03987829
## 9   1.00000000 0.20375 0.03634805
## 10  1.77827941 0.19500 0.04866267
## 11  3.16227766 0.18750 0.04409586
## 12  5.62341325 0.18875 0.04185375
## 13 10.00000000 0.18000 0.03593976
test.pred = predict(svm.poly, OJ.test)
table(OJ.test$Purchase, test.pred)
##     test.pred
##       CH  MM
##   CH 149  21
##   MM  34  66

0.2037037

H)Overall, which approach seems to give the best results on this data?