Question 5. We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.
set.seed(12345)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * (x1^2 - x2^2 > 0)
plot(x1, x2, xlab = "X1", ylab = "X2", col = ifelse(y, 'purple', 'black'))
data5 =data.frame(x1 = x1, x2 = x2, y=as.factor(y))
glm.fit5 = glm(y~., data=data5, family = 'binomial')
glm.fit5
##
## Call: glm(formula = y ~ ., family = "binomial", data = data5)
##
## Coefficients:
## (Intercept) x1 x2
## 0.07563 0.46494 0.41377
##
## Degrees of Freedom: 499 Total (i.e. Null); 497 Residual
## Null Deviance: 692.2
## Residual Deviance: 688 AIC: 694
set.seed(12345)
train5 <- sample(250, 250)
data.train5 <- data5[train5, ]
data.test5 <- data5[-train5, ]
glm.probs5 = predict(glm.fit5, newdata=data5, type = "response")
glm.preds5 = rep(0,500)
glm.preds5[ glm.probs5 > 0.5 ] = 1
plot(x1,x2,col=ifelse(glm.preds5 > 0, 'purple', 'black'), pch=ifelse(as.integer(glm.preds5>0)== y,3,1))
set.seed(12345)
glm.fite = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data5, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
set.seed(12345)
glm.poly = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data.train5, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
lm.probe = predict(glm.poly, data5, type = "response")
lm.prede = ifelse(lm.probe > 0.5, 1, 0)
data.pos5f = data5[lm.prede == 1, ]
data.neg5f = data5[lm.prede == 0, ]
plot(data.pos5f$x1, data.pos5f$x2, col = "purple", xlab = "X1", ylab = "X2", pch = 3)
points(data.neg5f$x1, data.neg5f$x2, col = "black", pch = 1)
library(e1071)
svm.fitg5 <- svm(as.factor(y) ~ x1 + x2, data5, kernel = "linear", cost = 0.01)
svm.predsg5 <- predict(svm.fitg5, data5)
data.pos5g <- data5[svm.predsg5 == 1, ]
data.neg5g <- data5[svm.predsg5 == 0, ]
plot(data.pos5g$x1, data.pos5g$x2, col = "purple", xlab = "X1", ylab = "X2", pch = 1)
points(data.neg5g$x1, data.neg5g$x2, col = "black", pch = 4)
svm.fit5h <- svm(as.factor(y) ~ x1 + x2, data5, gamma = 1)
svm.preds5h <- predict(svm.fit5h, data5)
data.pos5h <- data5[svm.preds5h == 1, ]
data.neg5h <- data5[svm.preds5h == 0, ]
plot(data.pos5h$x1, data.pos5h$x2, col = "black", xlab = "X1", ylab = "X2", pch = 1)
points(data.neg5h$x1, data.neg5h$x2, col = "purple", pch = 4)
Question 7. In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set.
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5
data(Auto)
attach(Auto)
str(Auto)
## 'data.frame': 392 obs. of 9 variables:
## $ mpg : num 18 15 18 16 17 15 14 14 14 15 ...
## $ cylinders : num 8 8 8 8 8 8 8 8 8 8 ...
## $ displacement: num 307 350 318 304 302 429 454 440 455 390 ...
## $ horsepower : num 130 165 150 150 140 198 220 215 225 190 ...
## $ weight : num 3504 3693 3436 3433 3449 ...
## $ acceleration: num 12 11.5 11 12 10.5 10 9 8.5 10 8.5 ...
## $ year : num 70 70 70 70 70 70 70 70 70 70 ...
## $ origin : num 1 1 1 1 1 1 1 1 1 1 ...
## $ name : Factor w/ 304 levels "amc ambassador brougham",..: 49 36 231 14 161 141 54 223 241 2 ...
mpg.auto.med = median(Auto$mpg)
mpg.new = ifelse(Auto$mpg > mpg.auto.med, 1, 0)
Auto$mpg.new = as.factor(mpg.new)
set.seed(1)
linearsvm7b =tune(svm,mpg.new~.,data=Auto,kernel ="linear",ranges=list(cost=c(0.001, 0.01, 0.1, 1,5,10,100)))
summary(linearsvm7b)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01025641
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-03 0.09442308 0.04519425
## 2 1e-02 0.07653846 0.03617137
## 3 1e-01 0.04596154 0.03378238
## 4 1e+00 0.01025641 0.01792836
## 5 5e+00 0.02051282 0.02648194
## 6 1e+01 0.02051282 0.02648194
## 7 1e+02 0.03076923 0.03151981
set.seed(5)
polysvm7c = tune(svm, mpg.new ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(polysvm7c)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5738462
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5841026 0.04075521
## 2 1.0 2 0.5841026 0.04075521
## 3 5.0 2 0.5841026 0.04075521
## 4 10.0 2 0.5738462 0.03762359
## 5 0.1 3 0.5841026 0.04075521
## 6 1.0 3 0.5841026 0.04075521
## 7 5.0 3 0.5841026 0.04075521
## 8 10.0 3 0.5841026 0.04075521
## 9 0.1 4 0.5841026 0.04075521
## 10 1.0 4 0.5841026 0.04075521
## 11 5.0 4 0.5841026 0.04075521
## 12 10.0 4 0.5841026 0.04075521
svm.linear = svm(mpg.new ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly = svm(mpg.new ~ ., data = Auto, kernel = "polynomial", cost = 10,
degree = 2)
svm.radial = svm(mpg.new ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpg.new", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.poly)
plotpairs(svm.radial)
Question 8. This problem involves the OJ data set which is part of the ISLR package.
library(ISLR)
attach(OJ)
str(OJ)
## 'data.frame': 1070 obs. of 18 variables:
## $ Purchase : Factor w/ 2 levels "CH","MM": 1 1 1 2 1 1 1 1 1 1 ...
## $ WeekofPurchase: num 237 239 245 227 228 230 232 234 235 238 ...
## $ StoreID : num 1 1 1 1 7 7 7 7 7 7 ...
## $ PriceCH : num 1.75 1.75 1.86 1.69 1.69 1.69 1.69 1.75 1.75 1.75 ...
## $ PriceMM : num 1.99 1.99 2.09 1.69 1.69 1.99 1.99 1.99 1.99 1.99 ...
## $ DiscCH : num 0 0 0.17 0 0 0 0 0 0 0 ...
## $ DiscMM : num 0 0.3 0 0 0 0 0.4 0.4 0.4 0.4 ...
## $ SpecialCH : num 0 0 0 0 0 0 1 1 0 0 ...
## $ SpecialMM : num 0 1 0 0 0 1 1 0 0 0 ...
## $ LoyalCH : num 0.5 0.6 0.68 0.4 0.957 ...
## $ SalePriceMM : num 1.99 1.69 2.09 1.69 1.69 1.99 1.59 1.59 1.59 1.59 ...
## $ SalePriceCH : num 1.75 1.75 1.69 1.69 1.69 1.69 1.69 1.75 1.75 1.75 ...
## $ PriceDiff : num 0.24 -0.06 0.4 0 0 0.3 -0.1 -0.16 -0.16 -0.16 ...
## $ Store7 : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 2 2 2 2 2 ...
## $ PctDiscMM : num 0 0.151 0 0 0 ...
## $ PctDiscCH : num 0 0 0.0914 0 0 ...
## $ ListPriceDiff : num 0.24 0.24 0.23 0 0 0.3 0.3 0.24 0.24 0.24 ...
## $ STORE : num 1 1 1 1 0 0 0 0 0 0 ...
set.seed(1)
intrain <- sample(nrow(OJ), 800)
train8 = OJ[intrain, ]
test8 = OJ[-intrain, ]
svm.linear8 <- svm(Purchase ~ ., data = train8, kernel = "linear", cost = 0.01)
summary(svm.linear8)
##
## Call:
## svm(formula = Purchase ~ ., data = train8, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred8 <- predict(svm.linear8, train8)
table(train8$Purchase, train.pred8)
## train.pred8
## CH MM
## CH 420 65
## MM 75 240
(75 + 65) / (420 + 240 + 75 + 65)
## [1] 0.175
test_pred <- predict(svm.linear8, test8)
table(test8$Purchase, test_pred)
## test_pred
## CH MM
## CH 153 15
## MM 33 69
(33 + 15) / (153 + 69 + 33 + 15)
## [1] 0.1777778
set.seed(1)
tune.op <- tune(svm, Purchase ~ ., data = train8, kernel = "linear", ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.op)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 3.162278
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.17625 0.02853482
## 2 0.01778279 0.17625 0.03143004
## 3 0.03162278 0.17125 0.02829041
## 4 0.05623413 0.17625 0.02853482
## 5 0.10000000 0.17250 0.03162278
## 6 0.17782794 0.17125 0.02829041
## 7 0.31622777 0.17125 0.02889757
## 8 0.56234133 0.17125 0.02703521
## 9 1.00000000 0.17500 0.02946278
## 10 1.77827941 0.17375 0.02729087
## 11 3.16227766 0.16875 0.03019037
## 12 5.62341325 0.17375 0.03304563
## 13 10.00000000 0.17375 0.03197764
Optimal Cost = 3.16
svm.line <- svm(Purchase ~ ., kernel = "linear", data = train8, cost = tune.op$best.parameter$cost)
train.pred8e <- predict(svm.line, train8)
table(train8$Purchase, train.pred8e)
## train.pred8e
## CH MM
## CH 423 62
## MM 70 245
(70 + 62) / (423 + 245 + 70 + 62)
## [1] 0.165
test.pred8e <- predict(svm.line, test8)
table(test8$Purchase, test.pred8e)
## test.pred8e
## CH MM
## CH 156 12
## MM 29 73
(29 + 12) / (156 + 73 + 29 + 12)
## [1] 0.1518519
svm.radial8f <- svm(Purchase ~ ., kernel = "radial", data = train8)
summary(svm.radial8f)
##
## Call:
## svm(formula = Purchase ~ ., data = train8, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 373
##
## ( 188 185 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred8f <- predict(svm.radial8f, train8)
table(train8$Purchase, train.pred8f)
## train.pred8f
## CH MM
## CH 441 44
## MM 77 238
(77 + 44) / (441 + 238 + 77 + 44)
## [1] 0.15125
test.pred8f <- predict(svm.radial8f, test8)
table(test8$Purchase, test.pred8f)
## test.pred8f
## CH MM
## CH 151 17
## MM 33 69
(33 + 17) / (151 + 69 + 33 + 17)
## [1] 0.1851852
set.seed(1)
tune.op8f <- tune(svm, Purchase ~ ., data = train8, kernel = "radial", ranges = list(cost = 10^seq(-2,
1, by = 0.25)))
summary(tune.op8f)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5623413
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39375 0.04007372
## 2 0.01778279 0.39375 0.04007372
## 3 0.03162278 0.35750 0.05927806
## 4 0.05623413 0.19500 0.02443813
## 5 0.10000000 0.18625 0.02853482
## 6 0.17782794 0.18250 0.03291403
## 7 0.31622777 0.17875 0.03230175
## 8 0.56234133 0.16875 0.02651650
## 9 1.00000000 0.17125 0.02128673
## 10 1.77827941 0.17625 0.02079162
## 11 3.16227766 0.17750 0.02266912
## 12 5.62341325 0.18000 0.02220485
## 13 10.00000000 0.18625 0.02853482
svm.radial8f <- svm(Purchase ~ ., kernel = "radial", data = train8, cost = tune.op8f$best.parameter$cost)
summary(svm.radial8f)
##
## Call:
## svm(formula = Purchase ~ ., data = train8, kernel = "radial", cost = tune.op8f$best.parameter$cost)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.5623413
##
## Number of Support Vectors: 397
##
## ( 200 197 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred8ff <- predict(svm.radial8f, train8)
table(train8$Purchase, train.pred8ff)
## train.pred8ff
## CH MM
## CH 437 48
## MM 71 244
(71 + 48) / (437 + 244 + 48 + 71)
## [1] 0.14875
test.pred8ff <- predict(svm.radial8f, test8)
table(test8$Purchase, test.pred8ff)
## test.pred8ff
## CH MM
## CH 150 18
## MM 30 72
(30 + 18) / (150 + 72 + 30 + 18)
## [1] 0.1777778
svm.poly8g <- svm(Purchase ~ ., kernel = "polynomial", data = train8, degree = 2)
summary(svm.poly8g)
##
## Call:
## svm(formula = Purchase ~ ., data = train8, kernel = "polynomial",
## degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 447
##
## ( 225 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred8g <- predict(svm.poly8g, train8)
table(train8$Purchase, train.pred8g)
## train.pred8g
## CH MM
## CH 449 36
## MM 110 205
(110 + 36) / (449 + 205 + 110 + 36)
## [1] 0.1825
test.pred8gg <- predict(svm.poly8g, test8)
table(test8$Purchase, test.pred8gg)
## test.pred8gg
## CH MM
## CH 153 15
## MM 45 57
(45 + 15) / (153 + 57 + 45 +15)
## [1] 0.2222222
set.seed(2)
tune.op8g <- tune(svm, Purchase ~ ., data = train8, kernel = "polynomial", degree = 2, ranges = list(cost = 10^seq(-2,
1, by = 0.25)))
summary(tune.op8g)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 3.162278
##
## - best performance: 0.18
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.39000 0.03670453
## 2 0.01778279 0.37000 0.03395258
## 3 0.03162278 0.36375 0.03197764
## 4 0.05623413 0.34500 0.03291403
## 5 0.10000000 0.32125 0.03866254
## 6 0.17782794 0.24750 0.03322900
## 7 0.31622777 0.20250 0.04073969
## 8 0.56234133 0.20250 0.03670453
## 9 1.00000000 0.19625 0.03910900
## 10 1.77827941 0.19125 0.03586723
## 11 3.16227766 0.18000 0.04005205
## 12 5.62341325 0.18000 0.04133199
## 13 10.00000000 0.18125 0.03830162
svm.poly8g <- svm(Purchase ~ ., kernel = "polynomial", degree = 2, data = train8, cost = tune.op8g$best.parameter$cost)
summary(svm.poly8g)
##
## Call:
## svm(formula = Purchase ~ ., data = train8, kernel = "polynomial",
## degree = 2, cost = tune.op8g$best.parameter$cost)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 3.162278
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 385
##
## ( 197 188 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred8t <- predict(svm.poly8g, train8)
table(train8$Purchase, train.pred8t)
## train.pred8t
## CH MM
## CH 451 34
## MM 90 225
(90 + 34) / (451 + 225 + 90 + 34)
## [1] 0.155
test.pred8t <- predict(svm.poly8g, test8)
table(test8$Purchase, test.pred8t)
## test.pred8t
## CH MM
## CH 154 14
## MM 41 61
(41 + 14)/(154 + 61 + 41 + 14)
## [1] 0.2037037