Questions 5, 7, 8
a.Generate a data set with n = 500 and p = 2, such that the observations belong to two classes with a quadratic decision boundary between them. For instance, you can do this as follows:
library(e1071)
## Warning: package 'e1071' was built under R version 4.2.2
library(ISLR2)
set.seed(13)
x1 = runif(500) - 0.5
x2 = runif(500) - 0.5
y = 1 * (x1^2 - x2^2 > 0)
plot(x1[y == 0],
x2[y == 0],
col = "darkorange",
xlab = "X1",
ylab = "X2",
pch = 20)
points(x1[y == 1],
x2[y == 1],
col = "blue",
pch = 4)
Both seem to be insignificant.
lm.fit = glm(y ~ x1 + x2,
family = binomial)
summary(lm.fit)
##
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.287 -1.183 1.070 1.150 1.275
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.01963 0.08974 0.219 0.827
## x1 -0.46453 0.30375 -1.529 0.126
## x2 0.03207 0.30589 0.105 0.917
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 693.08 on 499 degrees of freedom
## Residual deviance: 690.71 on 497 degrees of freedom
## AIC: 696.71
##
## Number of Fisher Scoring iterations: 3
data = data.frame(x1 = x1,
x2 = x2,
y = y)
lm.prob = predict(lm.fit,
data,
type = "response")
lm.pred = ifelse(lm.prob > 0.52, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1,
data.pos$x2,
col = "darkorange",
xlab = "X1",
ylab = "X2",
pch = 20)
points(data.neg$x1,
data.neg$x2,
col = "blue",
pch = 4)
lm.fit = glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2),
data = data,
family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
lm.prob = predict(lm.fit,
data,
type = "response")
lm.pred = ifelse(lm.prob > 0.5, 1, 0)
data.pos = data[lm.pred == 1, ]
data.neg = data[lm.pred == 0, ]
plot(data.pos$x1,
data.pos$x2,
col = "darkorange",
xlab = "X1",
ylab = "X2",
pch = 20)
points(data.neg$x1,
data.neg$x2,
col = "blue",
pch = 4)
svm.fit = svm(as.factor(y) ~ x1 + x2,
data,
kernel = "linear",
cost = 0.1)
svm.pred = predict(svm.fit, data)
data.pos = data[svm.pred == 1, ]
data.neg = data[svm.pred == 0, ]
plot(data.pos$x1,
data.pos$x2,
col = "darkorange",
xlab = "X1",
ylab = "X2",
pch = 20)
points(data.neg$x1,
data.neg$x2,
col = "blue",
pch = 4)
svm.fit = svm(as.factor(y) ~ x1 + x2,
data,
gamma = 1)
svm.pred = predict(svm.fit, data)
data.pos = data[svm.pred == 1, ]
data.neg = data[svm.pred == 0, ]
plot(data.pos$x1,
data.pos$x2,
col = "darkorange",
xlab = "X1",
ylab = "X2",
pch = 20)
points(data.neg$x1,
data.neg$x2,
col = "blue",
pch = 4)
Non-linear kernel SVMs can find non-linear boundaries.
gas.med = median(Auto$mpg)
new.var = ifelse(Auto$mpg > gas.med, 1, 0)
Auto$mpglevel = as.factor(new.var)
set.seed(13)
tune.out = tune(svm,
mpglevel ~ .,
data = Auto,
kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.01538462
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.07416667 0.05988249
## 2 1e-01 0.05128205 0.06280743
## 3 1e+00 0.01538462 0.01324097
## 4 5e+00 0.01794872 0.02110955
## 5 1e+01 0.02307692 0.02549818
## 6 1e+02 0.03589744 0.02756327
set.seed(83)
tune.out = tune(svm,
mpglevel ~ .,
data = Auto,
kernel = "polynomial",
ranges = list(cost = c(0.1, 1, 5, 10),
degree = c(2, 3, 4)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5432051
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5457051 0.03396838
## 2 1.0 2 0.5457051 0.03396838
## 3 5.0 2 0.5457051 0.03396838
## 4 10.0 2 0.5432051 0.05311555
## 5 0.1 3 0.5457051 0.03396838
## 6 1.0 3 0.5457051 0.03396838
## 7 5.0 3 0.5457051 0.03396838
## 8 10.0 3 0.5457051 0.03396838
## 9 0.1 4 0.5457051 0.03396838
## 10 1.0 4 0.5457051 0.03396838
## 11 5.0 4 0.5457051 0.03396838
## 12 10.0 4 0.5457051 0.03396838
set.seed(1)
tune.out = tune(svm,
mpglevel ~ .,
data = Auto,
kernel = "radial",
ranges = list(cost = c(0.1, 1, 5, 10),
gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.01
##
## - best performance: 0.02557692
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 1e-02 0.08929487 0.04382379
## 2 1.0 1e-02 0.07403846 0.03522110
## 3 5.0 1e-02 0.04852564 0.03303346
## 4 10.0 1e-02 0.02557692 0.02093679
## 5 0.1 1e-01 0.07903846 0.03874545
## 6 1.0 1e-01 0.05371795 0.03525162
## 7 5.0 1e-01 0.02820513 0.03299190
## 8 10.0 1e-01 0.03076923 0.03375798
## 9 0.1 1e+00 0.55115385 0.04366593
## 10 1.0 1e+00 0.06384615 0.04375618
## 11 5.0 1e+00 0.05884615 0.04020934
## 12 10.0 1e+00 0.05884615 0.04020934
## 13 0.1 5e+00 0.55115385 0.04366593
## 14 1.0 5e+00 0.49493590 0.04724924
## 15 5.0 5e+00 0.48217949 0.05470903
## 16 10.0 5e+00 0.48217949 0.05470903
## 17 0.1 1e+01 0.55115385 0.04366593
## 18 1.0 1e+01 0.51794872 0.05063697
## 19 5.0 1e+01 0.51794872 0.04917316
## 20 10.0 1e+01 0.51794872 0.04917316
## 21 0.1 1e+02 0.55115385 0.04366593
## 22 1.0 1e+02 0.55115385 0.04366593
## 23 5.0 1e+02 0.55115385 0.04366593
## 24 10.0 1e+02 0.55115385 0.04366593
svm.linear = svm(mpglevel ~ .,
data = Auto,
kernel = "linear",
cost = 1)
svm.poly = svm(mpglevel ~ .,
data = Auto,
kernel = "polynomial",
cost = 10,
degree = 2)
svm.radial = svm(mpglevel ~ .,
data = Auto,
kernel = "radial",
cost = 10,
gamma = 0.01)
plotpairs = function(fit) {
for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
}
}
plotpairs(svm.linear)
plotpairs(svm.poly)
plotpairs(svm.radial)
set.seed(13)
train = sample(dim(OJ)[1], 800)
OJ.train = OJ[train, ]
OJ.test = OJ[-train, ]
svm.linear = svm(Purchase ~ .,
kernel = "linear",
data = OJ.train,
cost = 0.01)
summary(svm.linear)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 439
##
## ( 219 220 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.linear,
OJ.train)
table(OJ.train$Purchase,
train.pred)
## train.pred
## CH MM
## CH 441 57
## MM 78 224
(78 + 57)/(441 + 224 + 78 + 57)
## [1] 0.16875
test.pred = predict(svm.linear,
OJ.test)
table(OJ.test$Purchase,
test.pred)
## test.pred
## CH MM
## CH 139 16
## MM 25 90
(25 + 16)/(139 + 90 + 25 + 16)
## [1] 0.1518519
set.seed(83)
tune.out = tune(svm,
Purchase ~ .,
data = OJ.train,
kernel = "linear",
ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5.623413
##
## - best performance: 0.16625
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.18000 0.04794383
## 2 0.01778279 0.17500 0.04330127
## 3 0.03162278 0.17625 0.03884174
## 4 0.05623413 0.17250 0.04322101
## 5 0.10000000 0.17250 0.04281744
## 6 0.17782794 0.17000 0.03917553
## 7 0.31622777 0.17250 0.04073969
## 8 0.56234133 0.17250 0.04281744
## 9 1.00000000 0.17125 0.04332131
## 10 1.77827941 0.17000 0.04090979
## 11 3.16227766 0.16875 0.04379958
## 12 5.62341325 0.16625 0.04528076
## 13 10.00000000 0.17125 0.04489571
svm.linear = svm(Purchase ~ .,
kernel = "linear",
data = OJ.train,
cost = tune.out$best.parameters$cost)
train.pred = predict(svm.linear,
OJ.train)
table(OJ.train$Purchase,
train.pred)
## train.pred
## CH MM
## CH 441 57
## MM 72 230
(72 + 57)/(441 + 230 + 72 + 57)
## [1] 0.16125
test.pred = predict(svm.linear,
OJ.test)
table(OJ.test$Purchase,
test.pred)
## test.pred
## CH MM
## CH 134 21
## MM 23 92
(23 + 21)/(134 + 92 + 23 + 21)
## [1] 0.162963
set.seed(8)
svm.radial = svm(Purchase ~ .,
data = OJ.train,
kernel = "radial")
summary(svm.radial)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "radial")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 379
##
## ( 191 188 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.radial,
OJ.train)
table(OJ.train$Purchase,
train.pred)
## train.pred
## CH MM
## CH 453 45
## MM 84 218
(84 + 45)/(453 + 218 + 84 + 45)
## [1] 0.16125
test.pred = predict(svm.radial,
OJ.test)
table(OJ.test$Purchase,
test.pred)
## test.pred
## CH MM
## CH 141 14
## MM 25 90
(25 + 14)/(141 + 90 + 25 + 14)
## [1] 0.1444444
set.seed(11)
tune.out = tune(svm, Purchase ~ .,
data = OJ.train,
kernel = "radial",
ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1.778279
##
## - best performance: 0.18625
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.37750 0.06892024
## 2 0.01778279 0.37750 0.06892024
## 3 0.03162278 0.37375 0.07440738
## 4 0.05623413 0.22250 0.05027701
## 5 0.10000000 0.19500 0.04216370
## 6 0.17782794 0.19375 0.03691676
## 7 0.31622777 0.19250 0.04830459
## 8 0.56234133 0.18625 0.04427267
## 9 1.00000000 0.18750 0.04487637
## 10 1.77827941 0.18625 0.04619178
## 11 3.16227766 0.18875 0.04693746
## 12 5.62341325 0.19000 0.04706674
## 13 10.00000000 0.18875 0.04466309
svm.radial = svm(Purchase ~ .,
data = OJ.train,
kernel = "radial",
cost = tune.out$best.parameters$cost)
train.pred = predict(svm.radial,
OJ.train)
table(OJ.train$Purchase,
train.pred)
## train.pred
## CH MM
## CH 454 44
## MM 82 220
(82 + 44)/(454 + 220 + 82 + 44)
## [1] 0.1575
test.pred = predict(svm.radial,
OJ.test)
table(OJ.test$Purchase,
test.pred)
## test.pred
## CH MM
## CH 140 15
## MM 25 90
(25 + 15)/(140 + 90 + 25 + 15)
## [1] 0.1481481
set.seed(10)
svm.poly = svm(Purchase ~ .,
data = OJ.train,
kernel = "poly",
degree = 2)
summary(svm.poly)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ.train, kernel = "poly", degree = 2)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 1
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 446
##
## ( 219 227 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train.pred = predict(svm.poly,
OJ.train)
table(OJ.train$Purchase,
train.pred)
## train.pred
## CH MM
## CH 461 37
## MM 116 186
(116 + 37)/(461 + 186 + 116 + 37)
## [1] 0.19125
test.pred = predict(svm.poly,
OJ.test)
table(OJ.test$Purchase,
test.pred)
## test.pred
## CH MM
## CH 144 11
## MM 38 77
(38 + 11)/(144 + 77 + 38 + 11)
## [1] 0.1814815
set.seed(19)
tune.out = tune(svm,
Purchase ~ .,
data = OJ.train,
kernel = "poly",
degree = 2,
ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune.out)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.37750 0.03670453
## 2 0.01778279 0.35750 0.04609772
## 3 0.03162278 0.34625 0.04966904
## 4 0.05623413 0.32875 0.05622685
## 5 0.10000000 0.30625 0.06620937
## 6 0.17782794 0.25625 0.06515207
## 7 0.31622777 0.21125 0.07487258
## 8 0.56234133 0.21125 0.06958458
## 9 1.00000000 0.21375 0.06755913
## 10 1.77827941 0.19750 0.07090682
## 11 3.16227766 0.18625 0.07370408
## 12 5.62341325 0.18125 0.05929271
## 13 10.00000000 0.18000 0.05041494
svm.poly = svm(Purchase ~ .,
data = OJ.train,
kernel = "poly",
degree = 2,
cost = tune.out$best.parameters$cost)
train.pred = predict(svm.poly,
OJ.train)
table(OJ.train$Purchase,
train.pred)
## train.pred
## CH MM
## CH 456 42
## MM 85 217
(85 + 42)/(456 + 217 + 85 + 42)
## [1] 0.15875
test.pred = predict(svm.poly,
OJ.test)
table(OJ.test$Purchase,
test.pred)
## test.pred
## CH MM
## CH 140 15
## MM 30 85
(30 + 15)/(140 + 85 + 30 + 15)
## [1] 0.1666667
Radial seems to be best with fewer errors.