Chapter 9

library(ISLR)

## Warning: package 'ISLR' was built under R version 4.2.2

library(e1071)

## Warning: package 'e1071' was built under R version 4.2.2

5 A

set.seed(1)
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)

plot(x1[y == 0], x2[y == 0], col = "red", xlab = "X1", ylab = "X2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "blue", pch = 4)

lm.fit <- glm(y~ x1 + x2, family = binomial)
summary(lm.fit)

## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.179  -1.139  -1.112   1.206   1.257  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260   0.089579  -0.974    0.330
## x1           0.196199   0.316864   0.619    0.536
## x2          -0.002854   0.305712  -0.009    0.993
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 692.18  on 499  degrees of freedom
## Residual deviance: 691.79  on 497  degrees of freedom
## AIC: 697.79
## 
## Number of Fisher Scoring iterations: 3

I had issues past this point and got too frustrated to continue. Thanks for everything this semester, I will say this class is intense and I relied a decent amount in the internet, but I did learn a decent amount about some of the core concepts. It is actually pretty nice understanding the concepts behind testing/training data and some of the different approaches. NOw when speaking with my data analyttics team at work, I am not as intimidated!

7 A

gas.med = median(Auto$mpg)
new.var = ifelse(Auto$mpg > gas.med, 1, 0)
Auto$mpglevel = as.factor(new.var)

set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.01025641 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.02051282 0.02648194
## 5 1e+01 0.02051282 0.02648194
## 6 1e+02 0.03076923 0.03151981

set.seed(1)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.5130128 
## 
## - Detailed performance results:
##    cost degree     error dispersion
## 1   0.1      2 0.5511538 0.04366593
## 2   1.0      2 0.5511538 0.04366593
## 3   5.0      2 0.5511538 0.04366593
## 4  10.0      2 0.5130128 0.08963366
## 5   0.1      3 0.5511538 0.04366593
## 6   1.0      3 0.5511538 0.04366593
## 7   5.0      3 0.5511538 0.04366593
## 8  10.0      3 0.5511538 0.04366593
## 9   0.1      4 0.5511538 0.04366593
## 10  1.0      4 0.5511538 0.04366593
## 11  5.0      4 0.5511538 0.04366593
## 12 10.0      4 0.5511538 0.04366593

set.seed(3)
tune.out = tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##    10  0.01
## 
## - best performance: 0.02551282 
## 
## - Detailed performance results:
##    cost gamma      error dispersion
## 1   0.1 1e-02 0.08935897 0.05024613
## 2   1.0 1e-02 0.07397436 0.03896185
## 3   5.0 1e-02 0.05358974 0.03718096
## 4  10.0 1e-02 0.02551282 0.02417610
## 5   0.1 1e-01 0.07653846 0.04350608
## 6   1.0 1e-01 0.05358974 0.03718096
## 7   5.0 1e-01 0.03320513 0.02720447
## 8  10.0 1e-01 0.02807692 0.01894083
## 9   0.1 1e+00 0.55346154 0.04319433
## 10  1.0 1e+00 0.06384615 0.04400278
## 11  5.0 1e+00 0.06391026 0.04047896
## 12 10.0 1e+00 0.06391026 0.04047896
## 13  0.1 5e+00 0.55346154 0.04319433
## 14  1.0 5e+00 0.49230769 0.05344444
## 15  5.0 5e+00 0.48980769 0.05628746
## 16 10.0 5e+00 0.48980769 0.05628746
## 17  0.1 1e+01 0.55346154 0.04319433
## 18  1.0 1e+01 0.52019231 0.06053102
## 19  5.0 1e+01 0.51006410 0.04925670
## 20 10.0 1e+01 0.51006410 0.04925670
## 21  0.1 1e+02 0.55346154 0.04319433
## 22  1.0 1e+02 0.55346154 0.04319433
## 23  5.0 1e+02 0.55346154 0.04319433
## 24 10.0 1e+02 0.55346154 0.04319433

svm.linear = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10, 
    degree = 2)
svm.radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
    for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
        plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
    }
}
plotpairs(svm.linear)

8 A

attach(OJ)
set.seed(1)
train<-sample(dim(OJ)[1], 800)
OJtraining<-OJ[train,]
OJtesting<-OJ[-train,]

OJsvm.lin <- svm(Purchase ~ ., kernel='linear',  data=OJtraining, cost=0.01)
summary(OJsvm.lin)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJtraining, kernel = "linear", 
##     cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

train.pred<-predict(OJsvm.lin, OJtraining)
table(OJtraining$Purchase, train.pred)

##     train.pred
##       CH  MM
##   CH 420  65
##   MM  75 240

The error rate is 17.5%

test.pred<-predict(OJsvm.lin, OJtesting)
table(OJtesting$Purchase, test.pred)

##     test.pred
##       CH  MM
##   CH 153  15
##   MM  33  69

The error rate is 17.8%

tune.out = tune(svm, Purchase ~ ., data = OJtraining, kernel = "linear", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.17125 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.17375 0.03884174
## 2   0.01778279 0.17500 0.03996526
## 3   0.03162278 0.17750 0.03717451
## 4   0.05623413 0.18000 0.03073181
## 5   0.10000000 0.17875 0.03064696
## 6   0.17782794 0.17875 0.03537988
## 7   0.31622777 0.17875 0.03438447
## 8   0.56234133 0.17625 0.03197764
## 9   1.00000000 0.17500 0.03061862
## 10  1.77827941 0.17375 0.02972676
## 11  3.16227766 0.17250 0.03270236
## 12  5.62341325 0.17250 0.03322900
## 13 10.00000000 0.17125 0.03488573

svm.linear = svm(Purchase ~ ., kernel = "linear", data = OJtraining, cost = tune.out$best.parameters$cost)
train.pred = predict(svm.linear, OJtraining)
table(OJtraining$Purchase, train.pred)

##     train.pred
##       CH  MM
##   CH 423  62
##   MM  69 246

test.pred = predict(svm.linear, OJtesting)
table(OJtesting$Purchase, test.pred)

##     test.pred
##       CH  MM
##   CH 156  12
##   MM  28  74

E The error rates have reduced to 16.4% and 14.8% respectively

svm.radial = svm(Purchase ~ ., data = OJtraining, kernel = "radial")
summary(svm.radial)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJtraining, kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  373
## 
##  ( 188 185 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

train.pred = predict(svm.radial, OJtraining)
table(OJtraining$Purchase, train.pred)

##     train.pred
##       CH  MM
##   CH 441  44
##   MM  77 238

test.pred = predict(svm.radial, OJtesting)
table(OJtesting$Purchase, test.pred)

##     test.pred
##       CH  MM
##   CH 151  17
##   MM  33  69

tune.out = tune(svm, Purchase ~ ., data = OJtesting, kernel = "radial", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.1703704 
## 
## - Detailed performance results:
##           cost     error dispersion
## 1   0.01000000 0.3777778 0.08151888
## 2   0.01778279 0.3777778 0.08151888
## 3   0.03162278 0.3777778 0.08151888
## 4   0.05623413 0.3777778 0.08151888
## 5   0.10000000 0.3740741 0.10395326
## 6   0.17782794 0.2259259 0.10966132
## 7   0.31622777 0.1888889 0.08454762
## 8   0.56234133 0.1777778 0.08868289
## 9   1.00000000 0.1703704 0.08936771
## 10  1.77827941 0.1888889 0.09634377
## 11  3.16227766 0.2037037 0.08597771
## 12  5.62341325 0.2074074 0.08038924
## 13 10.00000000 0.2185185 0.07499428

svm.radial = svm(Purchase ~ ., data = OJtraining, kernel = "radial", cost = tune.out$best.parameters$cost)
train.pred = predict(svm.radial, OJtraining)
table(OJtraining$Purchase, train.pred)

##     train.pred
##       CH  MM
##   CH 441  44
##   MM  77 238

test.pred = predict(svm.radial, OJtesting)
table(OJtesting$Purchase, test.pred)

##     test.pred
##       CH  MM
##   CH 151  17
##   MM  33  69

svm.poly = svm(Purchase ~ ., data = OJtraining, kernel = "poly", degree = 2)
summary(svm.poly)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJtraining, kernel = "poly", degree = 2)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  447
## 
##  ( 225 222 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

train.pred = predict(svm.poly, OJtraining)
table(OJtraining$Purchase, train.pred)

##     train.pred
##       CH  MM
##   CH 449  36
##   MM 110 205

test.pred = predict(svm.poly, OJtesting)
table(OJtesting$Purchase, test.pred)

##     test.pred
##       CH  MM
##   CH 153  15
##   MM  45  57

tune.out = tune(svm, Purchase ~ ., data = OJtraining, kernel = "poly", degree = 2,  ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##      cost
##  5.623413
## 
## - best performance: 0.18625 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.39000 0.04281744
## 2   0.01778279 0.37000 0.04048319
## 3   0.03162278 0.36625 0.03998698
## 4   0.05623413 0.34000 0.03987829
## 5   0.10000000 0.32375 0.04427267
## 6   0.17782794 0.24500 0.06241661
## 7   0.31622777 0.21250 0.06038074
## 8   0.56234133 0.20625 0.05408648
## 9   1.00000000 0.19375 0.05628857
## 10  1.77827941 0.19250 0.05109903
## 11  3.16227766 0.18750 0.05464532
## 12  5.62341325 0.18625 0.05015601
## 13 10.00000000 0.18625 0.04387878

svm.poly = svm(Purchase ~ ., data = OJtraining, kernel = "poly", degree = 2, cost = tune.out$best.parameters$cost)
train.pred = predict(svm.poly, OJtraining)
table(OJtraining$Purchase, train.pred)

##     train.pred
##       CH  MM
##   CH 447  38
##   MM  88 227

test.pred = predict(svm.poly, OJtesting)
table(OJtesting$Purchase, test.pred)

##     test.pred
##       CH  MM
##   CH 154  14
##   MM  36  66

H Sted D/E had the best rate

Chapter 9

Charles Ponthieux

2022-11-30