We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features. ## (a) Generate a data set with n = 500 and p = 2, such that the observations belong to two classes with a quadratic decision boundary between them. For instance, you can do this as follows:
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1 * (x1^2 - x2^2 > 0)
plot(x1[y == 0], x2[y == 0], col = "blue")
points(x1[y == 1], x2[y == 1], )
reg <- glm(y~x1 + x2, family = binomial)
summary(reg)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.337 -1.174 1.017 1.144 1.343
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.007045 0.090154 0.078 0.9377
## x1 -0.570872 0.308890 -1.848 0.0646 .
## x2 -0.250098 0.316155 -0.791 0.4289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 693.14 on 499 degrees of freedom
## Residual deviance: 689.18 on 497 degrees of freedom
## AIC: 695.18
##
## Number of Fisher Scoring iterations: 3
data = data.frame(x1 = x1, x2 = x2, y = y)
lm.prob = predict(reg, data, type = "response")
lm.pred = ifelse(lm.prob > 0.5, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "red")
points(data.neg$x1, data.neg$x2)
glmfit = glm(y ~ I(x1^2) + I(x2^2) + I(x1 * x2), data = data, family = binomial)
glmprobs = predict(glmfit, data, type = "response")
glmpred = ifelse(glmprobs > 0.5, 1, 0)
positive = data[glmpred == 1, ]
negative = data[glmpred == 0, ]
plot(positive$x1, positive$x2, col = "red")
points(negative$x1, negative$x2,)
library(e1071)
data$y <- as.factor(data$y)
svmfit <- svm(y ~ x1 + x2, data, kernel = "linear", cost = 0.01)
svmpred <- predict(svmfit, data)
data.pos <- data[svmpred == 1, ]
data.neg <- data[svmpred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "red")
points(data.neg$x1, data.neg$x2, col = "red")
svmfit = svm(y ~ x1 + x2, data, gamma = 1)
svmpred = predict(svmfit, data)
data.pos <- data[svmpred == 1, ]
data.neg <- data[svmpred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "red")
points(data.neg$x1, data.neg$x2, col = "blue")
library(ISLR2)
auto <- Auto
median <- ifelse(auto$mpg > median(auto$mpg), 1, 0)
auto$mpg1 <- as.factor(median)
##(b) Fit a support vector classifier to the data with various values of cost, in order to predict whether a car gets high or low gas mileage. Report the cross-validation errors associated with different values of this parameter. Comment on your results. Note you will need to fit the classifier without the gas mileage variable to produce sensible results.
options(scipen = 999)
set.seed(1)
tune <- tune(svm, mpg1 ~ ., data = auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 10, 100, 1000)))
summary(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.07653846 0.03617137
## 2 0.10 0.04596154 0.03378238
## 3 1.00 0.01025641 0.01792836
## 4 10.00 0.02051282 0.02648194
## 5 100.00 0.03076923 0.03151981
## 6 1000.00 0.03076923 0.03151981
set.seed(1)
tune <- tune(svm, mpg1 ~ ., data = auto, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 10), degree = c(2, 3)))
summary(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5130128
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.01 2 0.5511538 0.04366593
## 2 0.10 2 0.5511538 0.04366593
## 3 10.00 2 0.5130128 0.08963366
## 4 0.01 3 0.5511538 0.04366593
## 5 0.10 3 0.5511538 0.04366593
## 6 10.00 3 0.5511538 0.04366593
set.seed(1)
tune <- tune(svm, mpg1 ~ ., data = auto, kernel = "radial", gamma = c(10,.01), ranges = list(cost = c(0.01, 10)))
summary(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.5179487
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.5511538 0.04366593
## 2 10.00 0.5179487 0.04917316
# All the polynomial kernel svm with the different degrees and costs had the same error. The radial kernel had the best performance with gamma of .01 and cost = 10. However, the linear kernel outperformed both.
svmlinear <- svm(mpg1 ~., data = auto, kernel="linear", cost = 1)
svmpoly <- svm(mpg1 ~., data = auto, kernel="polynomial", cost = 10, degree = 2)
svmradial <- svm(mpg1 ~., data = auto, kernel="radial", gamma = 0.01, cost = 10)
plot(svmlinear , auto , mpg ~ cylinders)
plot(svmlinear , auto , mpg ~ displacement)
plot(svmlinear , auto , mpg ~ horsepower)
plot(svmlinear , auto , mpg ~ weight)
plot(svmlinear , auto , mpg ~ acceleration)
### Number 8 ## (a) Create a training set containing a random sample of
800 observations, and a test set containing the remaining
observations.
library(ISLR2)
oj <- OJ
set.seed(1)
sample <- sample(dim(oj)[1], 800)
train <- oj[sample, ]
test <- oj[-sample, ]
library(e1071)
svmlinear = svm(Purchase ~ ., kernel = "linear", data =train, cost = 0.01)
summary(svmlinear)
##
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
svmpred = predict(svmlinear, train)
table(train$Purchase, svmpred)
## svmpred
## CH MM
## CH 420 65
## MM 75 240
(65+75)/800
## [1] 0.175
# the train data error rate is 0.175.
svmpred = predict(svmlinear, test)
table(test$Purchase, svmpred)
## svmpred
## CH MM
## CH 153 15
## MM 33 69
(69+153)/270
## [1] 0.8222222
#the test error rate was 0.822.
options(scipen = 999)
set.seed(1)
tune <- tune(svm, Purchase ~ ., data = train, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 5.00 0.17250 0.03162278
## 5 10.00 0.17375 0.03197764
# optimal cost is 5.
svmlinear = svm(Purchase ~ ., kernel = "linear", data =train, cost = 5)
svmpred = predict(svmlinear, train)
table(train$Purchase, svmpred)
## svmpred
## CH MM
## CH 423 62
## MM 71 244
(62+71)/800
## [1] 0.16625
# the train data error rate is 0.166.
svmpred = predict(svmlinear, test)
table(test$Purchase, svmpred)
## svmpred
## CH MM
## CH 155 13
## MM 29 73
(13+29)/270
## [1] 0.1555556
#the test error rate was 0.156.
svmradial <- svm(Purchase ~., data = train, kernel="radial", cost=0.01)
svmpred = predict(svmradial, train)
table(train$Purchase, svmpred)
## svmpred
## CH MM
## CH 485 0
## MM 315 0
(315+0)/800
## [1] 0.39375
# the train data error rate is 0.39.
svmpred = predict(svmradial, test)
table(test$Purchase, svmpred)
## svmpred
## CH MM
## CH 168 0
## MM 102 0
(102+0)/270
## [1] 0.3777778
# the test error is 0.37.
set.seed(1)
tune <- tune(svm, Purchase ~ ., data = train, kernel = "radial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.04007372
## 2 0.10 0.18625 0.02853482
## 3 1.00 0.17125 0.02128673
## 4 5.00 0.18000 0.02220485
## 5 10.00 0.18625 0.02853482
# the best cost was 1.
svmradial <- svm(Purchase ~., data = train, kernel="radial", cost = 1)
svmpred = predict(svmradial, train)
table(train$Purchase, svmpred)
## svmpred
## CH MM
## CH 441 44
## MM 77 238
(44+77)/800
## [1] 0.15125
#the train error rate with cost = 1 was .15.
svmpred = predict(svmradial, test)
table(test$Purchase, svmpred)
## svmpred
## CH MM
## CH 151 17
## MM 33 69
(17+33)/270
## [1] 0.1851852
# the test error rate with cost = 1 is .185
svmpoly <- svm(Purchase ~., data = train, kernel="polynomial", cost=0.01, degree = 2)
svmpred = predict(svmpoly, train)
table(train$Purchase, svmpred)
## svmpred
## CH MM
## CH 484 1
## MM 297 18
(297+1)/800
## [1] 0.3725
# the train data error rate is 0.39.
svmpred = predict(svmpoly, test)
table(test$Purchase, svmpred)
## svmpred
## CH MM
## CH 167 1
## MM 98 4
(98+1)/270
## [1] 0.3666667
# the test error is 0.37.
set.seed(1)
tune <- tune(svm, Purchase ~ ., data = train, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, degree = 2)))
summary(tune)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.185
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.37125 0.03537988
## 2 0.10 0.28750 0.05068969
## 3 1.00 0.18500 0.02415229
## 4 5.00 0.18875 0.02913689
## 5 10.00 0.19500 0.03184162
## 6 2.00 0.19000 0.02188988
# the best cost was 1.
svmpoly <- svm(Purchase ~., data = train, kernel="polynomial", cost = 1)
svmpred = predict(svmpoly, train)
table(train$Purchase, svmpred)
## svmpred
## CH MM
## CH 453 32
## MM 91 224
(91+32)/800
## [1] 0.15375
#the train error rate with cost = 1 was .15.
svmpred = predict(svmpoly, test)
table(test$Purchase, svmpred)
## svmpred
## CH MM
## CH 155 13
## MM 47 55
(47+13)/270
## [1] 0.2222222
#the test error rate was 0.22.
# Overall, the radial kernel with a cost of 1 seemed to give the best results on the training set. The test error was 0.15. However, the results for all three tests were relatively the same.