Generate a simulated two-class data set with 100 observations and two features in which there is a visible but non-linear separation between the two classes. Show that in this setting, a support vector machine with a polynomial kernel (with degree greater than 1) or a radial kernel will outperform a support vector classifier on the training data. Which technique performs best on the test data? Make plots and report training and test error rates in order to back up your assertions.
library(ggplot2)
library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
set.seed(10)
data <- data.frame(
x = runif(100),
y = runif(100)
)
score <- (2 * data$x - 0.5)^2 + (data$y)^2 - 0.5
data$class <- factor(ifelse(score > 0, "red", "blue"))
p <- ggplot(data, aes(x = x, y = y, color = class)) +
geom_point(size = 2) +
scale_colour_identity()
p
train <- 1:50
test <- 51:100
fits <- list(
"Radial" = svm(class ~ ., data = data[train, ], kernel = "radial"),
"Polynomial" = svm(class ~ ., data = data[train, ], kernel = "polynomial", degree = 2),
"Linear" = svm(class ~ ., data = data[train, ], kernel = "linear")
)
err <- function(model, data) {
out <- table(predict(model, data), data$class)
(out[1, 2] + out[2, 1]) / sum(out)
}
plot(fits[[1]], data)
plot(fits[[2]], data)
plot(fits[[3]], data)
sapply(fits, err, data = data[train, ])
## Radial Polynomial Linear
## 0.04 0.30 0.10
sapply(fits, err, data = data[test, ])
## Radial Polynomial Linear
## 0.06 0.48 0.14
##In this case, the radial kernel performs best, followed by a linear kernel with the 2nd degree polynomial performing worst. The ordering of these models is the same for the training and test data sets.
In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set.
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.3.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
auto <- ISLR2::Auto |>
as_tibble() |>
mutate(high_mileage = factor(ifelse(mpg > median(mpg), 1, 0))) |>
select(-mpg)
cost_range <- c(0.1, 1, 10, 100, 1000)
tune_out_auto <-
tune(
svm,
high_mileage ~ .,
data = auto,
kernel = "linear",
ranges = list(cost = c(1e-09, 1e-06, 1e-04, cost_range))
)
tune_out_auto$performances
## cost error dispersion
## 1 1e-09 0.59153846 0.06762461
## 2 1e-06 0.59153846 0.06762461
## 3 1e-04 0.59153846 0.06762461
## 4 1e-01 0.09173077 0.03196920
## 5 1e+00 0.10211538 0.05549804
## 6 1e+01 0.11493590 0.05594052
## 7 1e+02 0.11493590 0.04746289
## 8 1e+03 0.10217949 0.04705437
## 1e-01 acheives the minimum CV error
tune_out_auto_poly <-
tune(
svm,
high_mileage ~ .,
data = auto,
kernel = "polynomial",
ranges = list(cost = c(1e-09, 1e-06, 1e-04, cost_range),
degree = 2:10)
)
tune_out_auto_poly$performances %>%
arrange(error)
## cost degree error dispersion
## 1 1e+03 3 0.2578846 0.06949410
## 2 1e+03 2 0.2861538 0.10669358
## 3 1e+02 2 0.3087179 0.06342390
## 4 1e+02 3 0.4109615 0.05365286
## 5 1e+01 2 0.4847436 0.08084596
## 6 1e-09 2 0.5382692 0.02488569
## 7 1e-06 2 0.5382692 0.02488569
## 8 1e-04 2 0.5382692 0.02488569
## 9 1e-01 2 0.5382692 0.02488569
## 10 1e+00 2 0.5382692 0.02488569
## 11 1e-09 3 0.5382692 0.02488569
## 12 1e-06 3 0.5382692 0.02488569
## 13 1e-04 3 0.5382692 0.02488569
## 14 1e-01 3 0.5382692 0.02488569
## 15 1e+00 3 0.5382692 0.02488569
## 16 1e+01 3 0.5382692 0.02488569
## 17 1e-09 4 0.5382692 0.02488569
## 18 1e-06 4 0.5382692 0.02488569
## 19 1e-04 4 0.5382692 0.02488569
## 20 1e-01 4 0.5382692 0.02488569
## 21 1e+00 4 0.5382692 0.02488569
## 22 1e+01 4 0.5382692 0.02488569
## 23 1e+02 4 0.5382692 0.02488569
## 24 1e+03 4 0.5382692 0.02488569
## 25 1e-09 5 0.5382692 0.02488569
## 26 1e-06 5 0.5382692 0.02488569
## 27 1e-04 5 0.5382692 0.02488569
## 28 1e-01 5 0.5382692 0.02488569
## 29 1e+00 5 0.5382692 0.02488569
## 30 1e+01 5 0.5382692 0.02488569
## 31 1e+02 5 0.5382692 0.02488569
## 32 1e+03 5 0.5382692 0.02488569
## 33 1e-09 6 0.5382692 0.02488569
## 34 1e-06 6 0.5382692 0.02488569
## 35 1e-04 6 0.5382692 0.02488569
## 36 1e-01 6 0.5382692 0.02488569
## 37 1e+00 6 0.5382692 0.02488569
## 38 1e+01 6 0.5382692 0.02488569
## 39 1e+02 6 0.5382692 0.02488569
## 40 1e+03 6 0.5382692 0.02488569
## 41 1e-09 7 0.5382692 0.02488569
## 42 1e-06 7 0.5382692 0.02488569
## 43 1e-04 7 0.5382692 0.02488569
## 44 1e-01 7 0.5382692 0.02488569
## 45 1e+00 7 0.5382692 0.02488569
## 46 1e+01 7 0.5382692 0.02488569
## 47 1e+02 7 0.5382692 0.02488569
## 48 1e+03 7 0.5382692 0.02488569
## 49 1e-09 8 0.5382692 0.02488569
## 50 1e-06 8 0.5382692 0.02488569
## 51 1e-04 8 0.5382692 0.02488569
## 52 1e-01 8 0.5382692 0.02488569
## 53 1e+00 8 0.5382692 0.02488569
## 54 1e+01 8 0.5382692 0.02488569
## 55 1e+02 8 0.5382692 0.02488569
## 56 1e+03 8 0.5382692 0.02488569
## 57 1e-09 9 0.5382692 0.02488569
## 58 1e-06 9 0.5382692 0.02488569
## 59 1e-04 9 0.5382692 0.02488569
## 60 1e-01 9 0.5382692 0.02488569
## 61 1e+00 9 0.5382692 0.02488569
## 62 1e+01 9 0.5382692 0.02488569
## 63 1e+02 9 0.5382692 0.02488569
## 64 1e+03 9 0.5382692 0.02488569
## 65 1e-09 10 0.5382692 0.02488569
## 66 1e-06 10 0.5382692 0.02488569
## 67 1e-04 10 0.5382692 0.02488569
## 68 1e-01 10 0.5382692 0.02488569
## 69 1e+00 10 0.5382692 0.02488569
## 70 1e+01 10 0.5382692 0.02488569
## 71 1e+02 10 0.5382692 0.02488569
## 72 1e+03 10 0.5382692 0.02488569
tune_out_auto_radial <-
tune(
svm,
high_mileage ~ .,
data = auto,
kernel = "radial",
ranges = list(cost = c(1e-09, 1e-06, 1e-04, cost_range),
gamma = c(0.001, 0.01, 0.1, 1, 10, 100, 1000))
)
tune_out_auto_radial$performances %>%
arrange(error)
## cost gamma error dispersion
## 1 1e+01 1e-01 0.07147436 0.04157095
## 2 1e+00 1e+00 0.07916667 0.05474862
## 3 1e+02 1e-02 0.08160256 0.04297800
## 4 1e+02 1e-03 0.08673077 0.05293115
## 5 1e+01 1e-02 0.08673077 0.05293115
## 6 1e+00 1e-01 0.08673077 0.05293115
## 7 1e+01 1e+00 0.08679487 0.04722483
## 8 1e+02 1e+00 0.08679487 0.04722483
## 9 1e+03 1e+00 0.08679487 0.04722483
## 10 1e+01 1e-03 0.08923077 0.05283769
## 11 1e+00 1e-02 0.08923077 0.05283769
## 12 1e-01 1e-01 0.09179487 0.05549454
## 13 1e+03 1e-03 0.09442308 0.04183677
## 14 1e+03 1e-01 0.09455128 0.05693973
## 15 1e+02 1e-01 0.10467949 0.04610441
## 16 1e+03 1e-02 0.10724359 0.04327443
## 17 1e+00 1e-03 0.11493590 0.07751202
## 18 1e-01 1e-02 0.11493590 0.07751202
## 19 1e+01 1e+01 0.51301282 0.07149922
## 20 1e+02 1e+01 0.51301282 0.07149922
## 21 1e+03 1e+01 0.51301282 0.07149922
## 22 1e-09 1e+00 0.51628205 0.15786078
## 23 1e-06 1e+00 0.51628205 0.15786078
## 24 1e-04 1e+00 0.51628205 0.15786078
## 25 1e-01 1e+00 0.51628205 0.15786078
## 26 1e-09 1e-01 0.52128205 0.14333412
## 27 1e-06 1e-01 0.52128205 0.14333412
## 28 1e-04 1e-01 0.52128205 0.14333412
## 29 1e+00 1e+01 0.52320513 0.07515343
## 30 1e-09 1e-02 0.52878205 0.12214392
## 31 1e-06 1e-02 0.52878205 0.12214392
## 32 1e-04 1e-02 0.52878205 0.12214392
## 33 1e-09 1e-03 0.53128205 0.11529943
## 34 1e-06 1e-03 0.53128205 0.11529943
## 35 1e-04 1e-03 0.53128205 0.11529943
## 36 1e-01 1e-03 0.53128205 0.11529943
## 37 1e-09 1e+03 0.54878205 0.07360511
## 38 1e-06 1e+03 0.54878205 0.07360511
## 39 1e-04 1e+03 0.54878205 0.07360511
## 40 1e-01 1e+03 0.54878205 0.07360511
## 41 1e-09 1e+01 0.55128205 0.06922813
## 42 1e-06 1e+01 0.55128205 0.06922813
## 43 1e-04 1e+01 0.55128205 0.06922813
## 44 1e-01 1e+01 0.55128205 0.06922813
## 45 1e-09 1e+02 0.55628205 0.06258736
## 46 1e-06 1e+02 0.55628205 0.06258736
## 47 1e-04 1e+02 0.55628205 0.06258736
## 48 1e-01 1e+02 0.55628205 0.06258736
## 49 1e+00 1e+02 0.55628205 0.06258736
## 50 1e+01 1e+02 0.55628205 0.06258736
## 51 1e+02 1e+02 0.55628205 0.06258736
## 52 1e+03 1e+02 0.55628205 0.06258736
## 53 1e+00 1e+03 0.55628205 0.06258736
## 54 1e+01 1e+03 0.55628205 0.06258736
## 55 1e+02 1e+03 0.55628205 0.06258736
## 56 1e+03 1e+03 0.55628205 0.06258736
##The lowest CV error is acheived with cost = 1e-01 and gamma = 1e-01. This error value is lower than the minimum errors obtained with linear and polynomial kernels.
tune_out_auto$performances %>%
ggplot(aes(factor(cost), error, group = 1)) +
geom_line() +
geom_point() +
geom_vline(xintercept = which.min(tune_out_auto$performances$error),
color = "red")
tune_out_auto_poly$performances %>%
ggplot(aes(factor(cost), factor(degree), fill = error)) +
geom_tile() +
scale_fill_viridis_b(direction = -1) +
labs(x = "cost",
y = "degree") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
tune_out_auto_radial$performances %>%
ggplot(aes(factor(cost), factor(gamma), fill = error)) +
geom_tile() +
scale_fill_viridis_b(direction = -1) +
labs(x = "cost",
y = "gamma") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## The radial kernel allows for us to obtain the minimum cross-validation error.
This problem involves the OJ data set which is part of the ISLR2 package.
library(ISLR2)
data(OJ)
set.seed(42)
train <- sample(seq_len(nrow(OJ)), 800)
test <- setdiff(seq_len(nrow(OJ)), train)
library(e1071)
fit <- svm(Purchase ~ ., data = OJ[train, ], kernel = "linear", cost = 0.01)
summary(fit)
##
## Call:
## svm(formula = Purchase ~ ., data = OJ[train, ], kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 432
##
## ( 215 217 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
err <- function(model, data) {
t <- table(predict(model, data), data[["Purchase"]])
1 - sum(diag(t)) / sum(t)
}
errs <- function(model) {
c(train = err(model, OJ[train, ]), test = err(model, OJ[test, ]))
}
errs(fit)
## train test
## 0.171250 0.162963
tuned <- tune(svm, Purchase ~ .,
data = OJ[train, ], kernel = "linear",
ranges = list(cost = 10^seq(-2, 1, length.out = 10))
)
tuned$best.parameters
## cost
## 7 1
summary(tuned)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.1775
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01000000 0.18250 0.04133199
## 2 0.02154435 0.18000 0.04005205
## 3 0.04641589 0.18000 0.05041494
## 4 0.10000000 0.18000 0.04901814
## 5 0.21544347 0.18250 0.04377975
## 6 0.46415888 0.18250 0.04090979
## 7 1.00000000 0.17750 0.04031129
## 8 2.15443469 0.18000 0.03961621
## 9 4.64158883 0.17875 0.03821086
## 10 10.00000000 0.18375 0.03438447
errs(tuned$best.model)
## train test
## 0.167500 0.162963
tuned2 <- tune(svm, Purchase ~ .,
data = OJ[train, ], kernel = "radial",
ranges = list(cost = 10^seq(-2, 1, length.out = 10))
)
tuned2$best.parameters
## cost
## 6 0.4641589
errs(tuned2$best.model)
## train test
## 0.1525000 0.1666667
tuned3 <- tune(svm, Purchase ~ .,
data = OJ[train, ], kernel = "polynomial",
ranges = list(cost = 10^seq(-2, 1, length.out = 10)), degree = 2
)
tuned3$best.parameters
## cost
## 9 4.641589
errs(tuned3$best.model)
## train test
## 0.1487500 0.1703704