Chapter 9: 5, 7, 8
5. a)
x1=runif (500) -0.5
x2=runif (500) -0.5
y=1*(x1^2-x2^2 > 0)
b)
plot(x1,x2, col= (6 -y))
c)
c<- glm(y ~ x1 + x2, family = "binomial")
summary(c)
##
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.246 -1.162 -1.097 1.172 1.282
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.03206 0.08968 -0.358 0.721
## x1 0.35145 0.31683 1.109 0.267
## x2 0.09408 0.31036 0.303 0.762
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 693.02 on 499 degrees of freedom
## Residual deviance: 691.70 on 497 degrees of freedom
## AIC: 697.7
##
## Number of Fisher Scoring iterations: 3
d)
data <- data.frame(x1 = x1, x2 = x2, y = y)
probs <- predict(c, data, type = "response")
preds <- rep(0, 500)
preds[probs > 0.47] <- 1
plot(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (4 - 1), pch = (3 - 1), xlab = "X1", ylab = "X2")
points(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (4 - 0), pch = (3 - 0))
e)
e <- glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(e)
##
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.213e-03 -2.000e-08 -2.000e-08 2.000e-08 1.291e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -39.69 32913.43 -0.001 0.999
## poly(x1, 2)1 481.05 116802.77 0.004 0.997
## poly(x1, 2)2 32460.15 989545.80 0.033 0.974
## poly(x2, 2)1 -1434.51 123305.81 -0.012 0.991
## poly(x2, 2)2 -31954.11 929756.98 -0.034 0.973
## I(x1 * x2) -262.78 396724.37 -0.001 0.999
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6.9302e+02 on 499 degrees of freedom
## Residual deviance: 4.8228e-06 on 494 degrees of freedom
## AIC: 12
##
## Number of Fisher Scoring iterations: 25
f)
probs <- predict(e, data, type = "response")
preds <- rep(0, 500)
preds[probs > 0.47] <- 1
plot(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (7 - 1), xlab = "X1", ylab = "X2")
points(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (4 - 0))
g)
library(e1071)
data$y <- as.factor(data$y)
g <- svm(y ~ x1 + x2, data, kernel = "linear", cost = 0.01)
preds <- predict(g, data)
plot(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (4 - 0), xlab = "X1", ylab = "X2")
points(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (4 - 1))
h)
data$y <- as.factor(data$y)
h <- svm(y ~ x1 + x2, data, kernel = "radial", gamma = 1)
preds <- predict(h, data)
plot(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (6 - 0), xlab = "X1", ylab = "X2")
points(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (4 - 1))
i) THe svm with the non-linear kernel seems to be better at classifying the data.
7.
a)
library(ISLR)
a <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$mpglevel <- as.factor(a)
b)
set.seed(2021)
b <- tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(b)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01262821
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.07384615 0.05674660
## 2 0.10 0.05333333 0.05252135
## 3 1.00 0.01262821 0.01778017
## 4 5.00 0.01775641 0.02086164
## 5 10.00 0.02025641 0.02319375
c)
The lowest error is 0.02 for the radial kernel.
c <- tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)), gamma = c(0.01, 0.1, 1, 5, 10))
summary(c)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.02551282
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.57660256 0.05049905
## 2 0.10 0.08935897 0.04404841
## 3 1.00 0.07141026 0.03359556
## 4 5.00 0.05096154 0.03591683
## 5 10.00 0.02551282 0.02402645
d<- tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)), gamma = c(0.01, 0.1, 1, 5, 10))
summary(d)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.2730128
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.5332051 0.02282387
## 2 0.10 0.5332051 0.02282387
## 3 1.00 0.5126923 0.04311879
## 4 5.00 0.2908974 0.04599609
## 5 10.00 0.2730128 0.05012627
d)
d_svm <- svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 100, gamma = 0.01)
plot(d_svm, Auto, mpg~cylinders)
plot(d_svm, Auto, mpg~year)
plot(d_svm, Auto, weight~displacement)
8.
a)
set.seed(2021)
index <- sample(nrow(OJ), 0.7*nrow(OJ))
train <- OJ[index, ]
test <- OJ[-index, ]
b)
g<- svm(Purchase~., train, kernel = "linear", cost = 0.01)
summary(g)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 407
##
## ( 204 203 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
c)
The training accuracy is 84% and the test accuracy is ~81%
pred<- predict(g, train)
table(train$Purchase, pred)
## pred
## CH MM
## CH 397 53
## MM 67 232
(416 + 214)/(416+214+52+67)
## [1] 0.8411215
pred<- predict(g, test)
table(test$Purchase, pred)
## pred
## CH MM
## CH 171 32
## MM 28 90
(162+97)/(162+97+23+39)
## [1] 0.8068536
d)
d_out <- tune(svm, Purchase ~ ., data = train, kernel = "linear", ranges = list(cost = 0.01,0.1,1,10))
summary(d_out)
##
## Error estimation of 'svm' using 10-fold cross validation: 0.178991
e) The training accuracy is 84% (error = ~16%) and the test accuracy is ~81%. The error/accuracy rates are about the same when using the best cost parameter.
e <- svm(Purchase ~ ., kernel = "linear", data = train, cost = d_out$best.parameter$cost)
pred_train <- predict(e, train)
table(train$Purchase, pred_train)
## pred_train
## CH MM
## CH 397 53
## MM 67 232
(416+214)/(416+214+52+67)
## [1] 0.8411215
pred_test <- predict(e, test)
table(test$Purchase, pred_test)
## pred_test
## CH MM
## CH 171 32
## MM 28 90
(162+97)/(162+97+23+39)
## [1] 0.8068536
f) With a radial kernel and cost = 0.01, all of the MM are misclassified in both the train and test sets. The same goes for the tuned cost parameter svm.
set.seed(2021)
g<- svm(Purchase~., train, kernel = "radial", cost = 0.01)
summary(g)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "radial", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 602
##
## ( 303 299 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
pred<- predict(g, train)
table(train$Purchase, pred)
## pred
## CH MM
## CH 450 0
## MM 299 0
pred<- predict(g, test)
table(test$Purchase, pred)
## pred
## CH MM
## CH 203 0
## MM 118 0
d_out <- tune(svm, Purchase ~ ., data = train, kernel = "radial", ranges = list(cost = 0.01,0.1,1,10))
summary(d_out)
##
## Error estimation of 'svm' using 10-fold cross validation: 0.3991892
e <- svm(Purchase ~ ., kernel = "radial", data = train, cost = d_out$best.parameter$cost)
pred_train <- predict(e, train)
table(train$Purchase, pred_train)
## pred_train
## CH MM
## CH 450 0
## MM 299 0
pred_test <- predict(e, test)
table(test$Purchase, pred_test)
## pred_test
## CH MM
## CH 203 0
## MM 118 0
g) Accuracy improves with the polynomial kernel, achieving between a 59% and a 63% accuracy on the train and test sets.
set.seed(2021)
g<- svm(Purchase~., train, kernel = "polynomial", degree=2, cost = 0.01)
summary(g)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "polynomial",
## degree = 2, cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 603
##
## ( 304 299 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
pred<- predict(g, train)
table(train$Purchase, pred)
## pred
## CH MM
## CH 450 0
## MM 291 8
# accuracy = 63%
(467+5)/(467+5+276+1)
## [1] 0.6301736
pred<- predict(g, test)
table(test$Purchase, pred)
## pred
## CH MM
## CH 202 1
## MM 117 1
# accuracy = 59%
(185+6)/(185+6+130)
## [1] 0.5950156
d_out <- tune(svm, Purchase ~ ., data = train, kernel = "polynomial", degree= 2, ranges = list(cost = 0.01,0.1,1,10))
summary(d_out)
##
## Error estimation of 'svm' using 10-fold cross validation: 0.3978559
e <- svm(Purchase ~ ., kernel = "polynomial", degree=2, data = train, cost = d_out$best.parameter$cost)
pred_train <- predict(e, train)
table(train$Purchase, pred_train)
## pred_train
## CH MM
## CH 450 0
## MM 291 8
# accuracy = 63%
(467+5)/(467+5+277)
## [1] 0.6301736
pred_test <- predict(e, test)
table(test$Purchase, pred_test)
## pred_test
## CH MM
## CH 202 1
## MM 117 1
# accuracy = 59.5%
(185+6)/(185+6+130)
## [1] 0.5950156
h) The linear kernel gives the best results accuracy-wise with ranges of 81-84%.