lab 10
question 4
# Load required libraries
library(e1071)
library(ggplot2)
library(gridExtra)
set.seed(123)
# Simulate a non-linearly separable dataset
n <- 100
x1 <- runif(n, -1, 1)
x2 <- runif(n, -1, 1)
y <- ifelse(x1^2 + x2^2 > 0.5, 1, 0)
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
# Split into train and test
train_idx <- sample(1:n, n * 0.7)
train_data <- data[train_idx, ]
test_data <- data[-train_idx, ]
# Fit models
svc_model <- svm(y ~ ., data = train_data, kernel = "linear", cost = 1)
poly_model <- svm(y ~ ., data = train_data, kernel = "polynomial", degree = 3, cost = 1)
radial_model <- svm(y ~ ., data = train_data, kernel = "radial", gamma = 1, cost = 1)
# Predictions and error rates
predict_and_error <- function(model, train_data, test_data) {
pred_train <- predict(model, train_data)
pred_test <- predict(model, test_data)
train_error <- mean(pred_train != train_data$y)
test_error <- mean(pred_test != test_data$y)
list(train_error = train_error, test_error = test_error)
}
errors <- list(
svc = predict_and_error(svc_model, train_data, test_data),
poly = predict_and_error(poly_model, train_data, test_data),
radial = predict_and_error(radial_model, train_data, test_data)
)
# Plot decision boundaries
plot_svm <- function(model, data, title) {
grid <- expand.grid(
x1 = seq(-1, 1, length = 100),
x2 = seq(-1, 1, length = 100)
)
grid$y <- predict(model, grid)
ggplot(data, aes(x = x1, y = x2, color = y)) +
geom_point(size = 2) +
geom_contour(data = grid, aes(z = as.numeric(y)), breaks = 1.5, color = "black") +
labs(title = title) +
theme_minimal()
}
p1 <- plot_svm(svc_model, train_data, "SVC (Linear)")
p2 <- plot_svm(poly_model, train_data, "SVM (Polynomial Kernel)")
p3 <- plot_svm(radial_model, train_data, "SVM (Radial Kernel)")
# Display all plots
grid.arrange(p1, p2, p3, nrow = 1)
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

# Print error rates
cat("Training and Test Error Rates:\n")
## Training and Test Error Rates:
for (model_name in names(errors)) {
cat(model_name, "\n")
cat(" Train Error:", round(errors[[model_name]]$train_error, 3), "\n")
cat(" Test Error :", round(errors[[model_name]]$test_error, 3), "\n")
}
## svc
## Train Error: 0.414
## Test Error : 0.467
## poly
## Train Error: 0.414
## Test Error : 0.467
## radial
## Train Error: 0.029
## Test Error : 0.067
insights
- This suggests that the polynomial kernel, with degree 3 and default
parameters, did not capture the non-linearity in the data any better
than a linear classifier.
- The RBF kernel successfully captured the curved decision boundary,
leading to dramatically improved classification.
- Best performance on training and test data: SVM with Radial Kernel.
It Shows that when there’s visible non-linear separation, a radial
kernel is much better suited than a linear SVC.
question 7
7a
library(ISLR)
data(Auto)
# Remove missing values just in case
Auto <- na.omit(Auto)
# Create binary variable: mpg01
Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
# Check the distribution
table(Auto$mpg01)
##
## 0 1
## 196 196
7b
library(e1071)
# Remove mpg to avoid data leakage
Auto_svm <- Auto[, !(names(Auto) %in% c("mpg"))]
set.seed(1)
# Cross-validation for linear kernel
tune_out_linear <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100)))
summary(tune_out_linear)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.09603609
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.10421950 0.03138085
## 2 1e-01 0.10227373 0.03634911
## 3 1e+00 0.09603609 0.03666741
## 4 1e+01 0.10531309 0.03683207
## 5 1e+02 0.12079079 0.03864160
7c- radial kernel
set.seed(1)
tune_out_radial <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "radial",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100),
gamma = c(0.01, 0.1, 1)))
summary(tune_out_radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.1
##
## - best performance: 0.06878597
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 1e-02 0.01 0.29820372 0.03634917
## 2 1e-01 0.01 0.09907227 0.02542107
## 3 1e+00 0.01 0.08728525 0.02925025
## 4 1e+01 0.01 0.08338639 0.03654471
## 5 1e+02 0.01 0.07622852 0.03078425
## 6 1e-02 0.10 0.15038588 0.02397154
## 7 1e-01 0.10 0.07282379 0.02593818
## 8 1e+00 0.10 0.07186118 0.03017440
## 9 1e+01 0.10 0.06878597 0.03196026
## 10 1e+02 0.10 0.08770589 0.03354051
## 11 1e-02 1.00 0.47238675 0.03941296
## 12 1e-01 1.00 0.27951125 0.03634400
## 13 1e+00 1.00 0.09918732 0.02052348
## 14 1e+01 1.00 0.10442706 0.02069028
## 15 1e+02 1.00 0.10442540 0.02069205
insights
- The radial kernel significantly outperformed both the linear and
polynomial kernels, achieving a lowest cross-validation error rate of
~6.88% with cost = 10 and gamma = 0.1. This improvement tells the
presence of non-linear decision boundaries in the data, which the radial
kernel successfully modeled. As cost increased, error rates generally
decreased, indicating better fit. However, overly high cost values may
risk overfitting, so tuning remains crucial.
7c- polynomial kernel
set.seed(1)
tune_out_poly <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "polynomial",
ranges = list(cost = c(0.01, 0.1, 1, 10),
degree = c(2, 3, 4)))
summary(tune_out_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.3375643
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.01 2 0.4982382 0.03934745
## 2 0.10 2 0.4960905 0.03974005
## 3 1.00 2 0.4752293 0.04525479
## 4 10.00 2 0.3375643 0.08311313
## 5 0.01 3 0.4984136 0.03930767
## 6 0.10 3 0.4978768 0.03935396
## 7 1.00 3 0.4924827 0.03986701
## 8 10.00 3 0.4418838 0.04913560
## 9 0.01 4 0.4984719 0.03930290
## 10 0.10 4 0.4984602 0.03930559
## 11 1.00 4 0.4983427 0.03933257
## 12 10.00 4 0.4971505 0.03961089
insights
- The polynomial kernel yielded significantly higher error rates, with
the best performance at ~33.76% (cost = 10, degree = 2). This is far
worse than both linear and radial kernels.
- The SVM with a radial kernel is the best choice for this
classification problem.
- It outperforms both the linear and polynomial SVMs in terms of
cross-validation error.
- The polynomial kernel was least effective, indicating that its
assumptions about the data’s shape did not match the actual
distribution.
7d
# Load required libraries
library(e1071)
library(ISLR)
# Prepare the data
Auto <- na.omit(Auto)
Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto_svm <- Auto[, !(names(Auto) %in% c("mpg"))]
# Fit the best radial SVM model (if not done already)
set.seed(1)
tune_out_radial <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "radial",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100),
gamma = c(0.01, 0.1, 1)))
best_radial <- tune_out_radial$best.model
# --- Plot decision boundary for selected variable pair ---
# Plot 1: horsepower vs. weight
plot(best_radial, Auto_svm, horsepower ~ weight,
main = "Radial SVM: Horsepower vs. Weight")
question 8
library(ISLR2)
##
## Attaching package: 'ISLR2'
## The following object is masked _by_ '.GlobalEnv':
##
## Auto
## The following objects are masked from 'package:ISLR':
##
## Auto, Credit
library(e1071)
# Load the data
data(OJ)
set.seed(1) # For reproducibility
8a
train_idx <- sample(1:nrow(OJ), 800)
train_data <- OJ[train_idx, ]
test_data <- OJ[-train_idx, ]
8b
svm_linear <- svm(Purchase ~ ., data = train_data, kernel = "linear", cost = 0.01)
summary(svm_linear)
##
## Call:
## svm(formula = Purchase ~ ., data = train_data, kernel = "linear",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
results
- The large number of support vectors indicates that the classifier is
allowing a very soft margin, likely due to the small cost
parameter.
- This softness may help in cases where classes overlap but can also
increase training error or lead to underfitting if the cost is too
low.
8c
# Training error
pred_train <- predict(svm_linear, train_data)
train_error <- mean(pred_train != train_data$Purchase)
# Test error
pred_test <- predict(svm_linear, test_data)
test_error <- mean(pred_test != test_data$Purchase)
train_error
## [1] 0.175
test_error
## [1] 0.1777778
- The training and test error rates are very close, which suggests
that the model is not overfitting — a typical outcome when using a small
cost value in an SVM, which encourages a softer margin and allows some
misclassifications.
- However, a 17–18% error rate might be higher than ideal, suggesting
that the model may be underfitting, and a higher cost might improve
performance by creating a firmer decision boundary.
8d
set.seed(1)
tune_linear <- tune(svm, Purchase ~ ., data = train_data, kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_linear)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.1
##
## - best performance: 0.1725
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764
8e
best_linear <- tune_linear$best.model
# Error rates
train_error_best <- mean(predict(best_linear, train_data) != train_data$Purchase)
test_error_best <- mean(predict(best_linear, test_data) != test_data$Purchase)
train_error_best
## [1] 0.165
test_error_best
## [1] 0.162963
8f
set.seed(1)
tune_radial <- tune(svm, Purchase ~ ., data = train_data, kernel = "radial",
ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.04007372
## 2 0.10 0.18625 0.02853482
## 3 1.00 0.17125 0.02128673
## 4 10.00 0.18625 0.02853482
# Best radial model
best_radial <- tune_radial$best.model
train_error_radial <- mean(predict(best_radial, train_data) != train_data$Purchase)
test_error_radial <- mean(predict(best_radial, test_data) != test_data$Purchase)
train_error_radial
## [1] 0.15125
test_error_radial
## [1] 0.1851852
8g
set.seed(1)
tune_poly <- tune(svm, Purchase ~ ., data = train_data, kernel = "polynomial", degree = 2,
ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39125 0.04210189
## 2 0.10 0.32125 0.05001736
## 3 1.00 0.20250 0.04116363
## 4 10.00 0.18125 0.02779513
# Best poly model
best_poly <- tune_poly$best.model
train_error_poly <- mean(predict(best_poly, train_data) != train_data$Purchase)
test_error_poly <- mean(predict(best_poly, test_data) != test_data$Purchase)
train_error_poly
## [1] 0.15
test_error_poly
## [1] 0.1888889
8h
- After tuning and comparing all three kernel types — linear, radial,
and polynomial — the radial kernel SVM typically provides the best test
performance on this dataset. It handles non-linear boundaries well and
adapts flexibly to the structure of the data.
- While the linear SVM is simple and interpretable, its performance is
slightly worse than the radial kernel. The polynomial kernel with degree
2, although more flexible than linear, tends to either underfit or
overfit and did not outperform the radial kernel in test error.
- Therefore, the SVM with a radial kernel is the preferred model,
offering the best trade-off between flexibility and generalization on
the OJ dataset.
comment on results