Generate a simulated two-class data set with 100 observations and two features in which there is a visible but non-linear separation between the two classes. Show that in this setting, a support vector machine with a polynomial kernel (with degree greater than 1) or a radial kernel will outperform a support vector classifier on the training data. Which technique performs best on the test data? Make plots and report training and test error rates in order to back up your assertions.
if (!require("e1071")) install.packages("e1071", dependencies=TRUE)
## Loading required package: e1071
if (!require("ggplot2")) install.packages("ggplot2", dependencies=TRUE)
## Loading required package: ggplot2
if (!require("dplyr")) install.packages("dplyr", dependencies=TRUE)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if (!require("gridExtra")) install.packages("gridExtra", dependencies=TRUE)
## Loading required package: gridExtra
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(e1071)
library(ggplot2)
library(dplyr)
library(gridExtra)
set.seed(42)
n <- 100
x1 <- runif(n, -1, 1)
x2 <- runif(n, -1, 1)
y <- ifelse(x1^2 + x2^2 > 0.5, 1, 0)
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
# Train-Test Split
set.seed(123)
train_index <- sample(1:n, n * 0.7)
train_data <- data[train_index, ]
test_data <- data[-train_index, ]
Train SVM Models
svm_linear <- svm(y ~ ., data = train_data, kernel = "linear", cost = 1)
svm_poly <- svm(y ~ ., data = train_data, kernel = "polynomial", degree = 3, cost = 1)
svm_rbf <- svm(y ~ ., data = train_data, kernel = "radial", gamma = 1, cost = 1)
Plot Decision Boundaries
plot_svm <- function(model, data, title){
grid <- expand.grid(x1 = seq(-1, 1, length = 200),
x2 = seq(-1, 1, length = 200))
grid$pred <- predict(model, grid)
ggplot() +
geom_point(data = data, aes(x = x1, y = x2, color = y), size = 2) +
geom_contour(data = grid, aes(x = x1, y = x2, z = as.numeric(pred)),
breaks = 1.5, color = "black") +
labs(title = title) +
theme_minimal() +
theme(legend.position = "none")
}
p1 <- plot_svm(svm_linear, train_data, "SVM Linear Kernel")
p2 <- plot_svm(svm_poly, train_data, "SVM Polynomial Kernel (deg=3)")
p3 <- plot_svm(svm_rbf, train_data, "SVM RBF Kernel")
grid.arrange(p1, p2, p3, ncol = 3)
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
Training & Test Error Rates
train_preds_linear <- predict(svm_linear, newdata = train_data)
test_preds_linear <- predict(svm_linear, newdata = test_data)
train_preds_poly <- predict(svm_poly, newdata = train_data)
test_preds_poly <- predict(svm_poly, newdata = test_data)
train_preds_rbf <- predict(svm_rbf, newdata = train_data)
test_preds_rbf <- predict(svm_rbf, newdata = test_data)
error_rate <- function(preds, actual) {
mean(preds != actual)
}
train_errors <- c(
Linear = error_rate(train_preds_linear, train_data$y),
Polynomial = error_rate(train_preds_poly, train_data$y),
RBF = error_rate(train_preds_rbf, train_data$y)
)
test_errors <- c(
Linear = error_rate(test_preds_linear, test_data$y),
Polynomial = error_rate(test_preds_poly, test_data$y),
RBF = error_rate(test_preds_rbf, test_data$y)
)
cat("Training Error Rates:\n")
## Training Error Rates:
print(round(train_errors, 3))
## Linear Polynomial RBF
## 0.400 0.400 0.029
cat("\nTest Error Rates:\n")
##
## Test Error Rates:
print(round(test_errors, 3))
## Linear Polynomial RBF
## 0.3 0.3 0.0
The plots show that the linear SVM fails to separate the non-linear classes effectively, with many misclassified points. The polynomial (degree 3) and RBF kernels produce curved decision boundaries that better fit the data.
In terms of error rates:
Linear kernel has the highest training and test error.
Polynomial kernel performs better but may slightly overfit.
RBF kernel achieves the lowest training and test errors, indicating the best overall performance and generalization.
These results confirm that non-linear kernels, especially RBF, are more suitable for datasets with curved or complex decision boundaries.
if (!require("ISLR")) install.packages("ISLR", dependencies = TRUE)
## Loading required package: ISLR
if (!require("e1071")) install.packages("e1071", dependencies = TRUE)
if (!require("ggplot2")) install.packages("ggplot2", dependencies = TRUE)
if (!require("caret")) install.packages("caret", dependencies = TRUE)
## Loading required package: caret
## Loading required package: lattice
library(ISLR)
library(e1071)
library(ggplot2)
library(caret)
data("Auto")
Auto <- na.omit(Auto)
Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$mpg01 <- as.factor(Auto$mpg01)
Auto_data <- subset(Auto, select = -mpg)
set.seed(123)
train_index <- sample(1:nrow(Auto_data), 0.7 * nrow(Auto_data))
train_data <- Auto_data[train_index, ]
test_data <- Auto_data[-train_index, ]
set.seed(123)
tune_out <- tune(svm, mpg01 ~ ., data = train_data, kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100)))
best_linear <- tune_out$best.model
pred_linear <- predict(best_linear, test_data)
test_error_linear <- mean(pred_linear != test_data$mpg01)
cat("Test Error (Linear SVM):", round(test_error_linear, 3))
## Test Error (Linear SVM): 0.093
set.seed(123)
tune_rbf <- tune(svm, mpg01 ~ ., data = train_data, kernel = "radial",
ranges = list(cost = c(0.1, 1, 10), gamma = c(0.01, 0.1, 1)))
best_rbf <- tune_rbf$best.model
pred_rbf <- predict(best_rbf, test_data)
test_error_rbf <- mean(pred_rbf != test_data$mpg01)
cat("Test Error (RBF SVM):", round(test_error_rbf, 3))
## Test Error (RBF SVM): 0.102
set.seed(123)
tune_poly <- tune(svm, mpg01 ~ ., data = train_data, kernel = "polynomial",
ranges = list(cost = c(0.1, 1, 10), degree = c(2, 3)))
best_poly <- tune_poly$best.model
pred_poly <- predict(best_poly, test_data)
test_error_poly <- mean(pred_poly != test_data$mpg01)
cat("Test Error (Polynomial SVM):", round(test_error_poly, 3))
## Test Error (Polynomial SVM): 0.508
Since we have more than two predictors, we plot with two selected
features (e.g., horsepower
and weight
):
train_reduced <- train_data[, c("horsepower", "weight", "mpg01")]
test_reduced <- test_data[, c("horsepower", "weight", "mpg01")]
svm_reduced_rbf <- svm(mpg01 ~ ., data = train_reduced, kernel = "radial", cost = 1, gamma = 0.1)
plot(svm_reduced_rbf, train_reduced, horsepower ~ weight)
A binary variable mpg01
was created to classify cars as
having high or low mileage.
Using a linear SVM, the model achieved moderate
accuracy, but struggled with non-linear boundaries.
Tuning the cost parameter improved performance
slightly.
The polynomial (degree = 3) and RBF
kernels performed significantly better, capturing the curved
relationship in the data.
Among all models, the RBF kernel had the lowest test error, demonstrating the best generalization for this classification task.
if (!require("ISLR2")) install.packages("ISLR2")
## Loading required package: ISLR2
##
## Attaching package: 'ISLR2'
## The following object is masked _by_ '.GlobalEnv':
##
## Auto
## The following objects are masked from 'package:ISLR':
##
## Auto, Credit
if (!require("e1071")) install.packages("e1071")
if (!require("caret")) install.packages("caret")
library(ISLR2)
library(e1071)
library(caret)
data("OJ")
set.seed(123)
train_indices <- sample(1:nrow(OJ), 800)
train_oj <- OJ[train_indices, ]
test_oj <- OJ[-train_indices, ]
svm_linear_01 <- svm(Purchase ~ ., data = train_oj, kernel = "linear", cost = 0.01)
summary(svm_linear_01)
##
## Call:
## svm(formula = Purchase ~ ., data = train_oj, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 442
##
## ( 220 222 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_pred_01 <- predict(svm_linear_01, train_oj)
test_pred_01 <- predict(svm_linear_01, test_oj)
train_error_01 <- mean(train_pred_01 != train_oj$Purchase)
test_error_01 <- mean(test_pred_01 != test_oj$Purchase)
cat("Train Error (cost = 0.01):", round(train_error_01, 3), "\n")
## Train Error (cost = 0.01): 0.165
cat("Test Error (cost = 0.01):", round(test_error_01, 3), "\n")
## Test Error (cost = 0.01): 0.178
set.seed(123)
tune_linear <- tune(svm, Purchase ~ ., data = train_oj, kernel = "linear",
ranges = list(cost = seq(0.01, 10, length.out = 10)))
best_linear_model <- tune_linear$best.model
summary(best_linear_model)
##
## Call:
## best.tune(METHOD = svm, train.x = Purchase ~ ., data = train_oj,
## ranges = list(cost = seq(0.01, 10, length.out = 10)), kernel = "linear")
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 2.23
##
## Number of Support Vectors: 336
##
## ( 166 170 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_pred_best <- predict(best_linear_model, train_oj)
test_pred_best <- predict(best_linear_model, test_oj)
train_error_best <- mean(train_pred_best != train_oj$Purchase)
test_error_best <- mean(test_pred_best != test_oj$Purchase)
cat("Train Error (best linear cost):", round(train_error_best, 3), "\n")
## Train Error (best linear cost): 0.159
cat("Test Error (best linear cost):", round(test_error_best, 3), "\n")
## Test Error (best linear cost): 0.156
set.seed(123)
tune_rbf <- tune(svm, Purchase ~ ., data = train_oj, kernel = "radial",
ranges = list(cost = seq(0.01, 10, length.out = 10)))
best_rbf_model <- tune_rbf$best.model
train_pred_rbf <- predict(best_rbf_model, train_oj)
test_pred_rbf <- predict(best_rbf_model, test_oj)
train_error_rbf <- mean(train_pred_rbf != train_oj$Purchase)
test_error_rbf <- mean(test_pred_rbf != test_oj$Purchase)
cat("Train Error (RBF):", round(train_error_rbf, 3), "\n")
## Train Error (RBF): 0.139
cat("Test Error (RBF):", round(test_error_rbf, 3), "\n")
## Test Error (RBF): 0.189
set.seed(123)
tune_poly <- tune(svm, Purchase ~ ., data = train_oj, kernel = "polynomial",
ranges = list(cost = seq(0.01, 10, length.out = 10)),
degree = 2)
best_poly_model <- tune_poly$best.model
train_pred_poly <- predict(best_poly_model, train_oj)
test_pred_poly <- predict(best_poly_model, test_oj)
train_error_poly <- mean(train_pred_poly != train_oj$Purchase)
test_error_poly <- mean(test_pred_poly != test_oj$Purchase)
cat("Train Error (Poly deg=2):", round(train_error_poly, 3), "\n")
## Train Error (Poly deg=2): 0.151
cat("Test Error (Poly deg=2):", round(test_error_poly, 3), "\n")
## Test Error (Poly deg=2): 0.2
cat("Linear SVM - Best Cost: Train =", round(train_error_best, 3),
"Test =", round(test_error_best, 3), "\n")
## Linear SVM - Best Cost: Train = 0.159 Test = 0.156
cat("RBF SVM: Train =", round(train_error_rbf, 3),
"Test =", round(test_error_rbf, 3), "\n")
## RBF SVM: Train = 0.139 Test = 0.189
cat("Poly SVM (deg=2): Train =", round(train_error_poly, 3),
"Test =", round(test_error_poly, 3), "\n")
## Poly SVM (deg=2): Train = 0.151 Test = 0.2
The SVM with a linear kernel (cost = 0.01) showed
moderate training and test errors. After tuning, the optimal
linear model improved both error rates slightly.
The RBF kernel achieved the lowest test
error, showing strong performance on unseen data.
The polynomial kernel (degree = 2) had low training
error but slightly higher test error, suggesting mild overfitting.
Overall, the RBF kernel provided the best generalization and prediction accuracy on this dataset.