library(ggplot2)
library(e1071)
library(caret)
## Loading required package: lattice
library(ISLR2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
CLass1
else Class2
.set.seed(96)
# Step 1: Simulate data (non-linear boundary)
n <- 100
x1 <- runif(n, -2, 2)
x2 <- runif(n, -2, 2)
# Create a non-linear decision boundary (circle)
y <- ifelse(x1^2 + x2^2 > 1.5^2, "Class1", "Class2")
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
# Split into train/test
train_idx <- sample(1:n, size = 70)
train <- data[train_idx, ]
test <- data[-train_idx, ]
# Linear SVM (Support Vector Classifier)
svm_linear <- svm(y ~ ., data = train, kernel = "linear", cost = 1)
# Polynomial Kernel (degree = 3)
svm_poly <- svm(y ~ ., data = train, kernel = "polynomial", degree = 3, cost = 1)
# Radial Basis Function Kernel
svm_rbf <- svm(y ~ ., data = train, kernel = "radial", gamma = 1, cost = 1)
# Predictions
train_preds <- data.frame(
linear = predict(svm_linear, train),
poly = predict(svm_poly, train),
rbf = predict(svm_rbf, train)
)
test_preds <- data.frame(
linear = predict(svm_linear, test),
poly = predict(svm_poly, test),
rbf = predict(svm_rbf, test)
)
# Compute training error for each model
train_error_linear <- mean(predict(svm_linear, train) != train$y)
train_error_poly <- mean(predict(svm_poly, train) != train$y)
train_error_rbf <- mean(predict(svm_rbf, train) != train$y)
# Compute test error for each model
test_error_linear <- mean(predict(svm_linear, test) != test$y)
test_error_poly <- mean(predict(svm_poly, test) != test$y)
test_error_rbf <- mean(predict(svm_rbf, test) != test$y)
error_df <- data.frame(
Model = c("Linear", "Polynomial", "RBF"),
Training_Error = c(train_error_linear, train_error_poly, train_error_rbf),
Test_Error = c(test_error_linear, test_error_poly, test_error_rbf)
)
print(format(error_df, digits = 3), row.names = FALSE)
## Model Training_Error Test_Error
## Linear 0.4714 0.233
## Polynomial 0.3143 0.500
## RBF 0.0286 0.000
Note: About Radial Kernel Test-Error
Its slightly confusing to have a test error as 0.000, this implies on test data, the radial kernel svm has made no mistake.
This could be because the test data could be more clearly seperated than you would like to test our model.
k-fold CV would have be a better method to accuarately understand its performance on test dataset.
Interpretation of the results:
Radial kernel out performs both Linear and Polynomial with degree 3 in both Training Erro and Test Error.
Ironically, Linear SVC has a better test error compared Polynomial. Then again, a k-fold CV would accurately help us understand each model better.
On new data lets plot all three models:
# Create grid for plotting
x1_seq <- seq(-2, 2, length.out = 200)
x2_seq <- seq(-2, 2, length.out = 200)
grid <- expand.grid(x1 = x1_seq, x2 = x2_seq)
grid$pred <- predict(svm_linear, newdata = grid)
# Plot decision boundary
ggplot() +
geom_tile(data = grid, aes(x = x1, y = x2, fill = pred), alpha = 0.4) +
geom_point(data = train, aes(x = x1, y = x2, color = y), size = 2) +
labs(title = "SVM Linear - Decision Boundary",
x = "x1", y = "x2") +
theme_minimal() +
scale_fill_manual(values = c("Class1" = "#99ccff", "Class2" = "#ffcccc")) +
scale_color_manual(values = c("Class1" = "blue", "Class2" = "red"))
Observations:
The model completely predicts everypoint as
Class1
Worst performer
# Predict on grid
grid$pred <- predict(svm_rbf, newdata = grid)
# Plot decision boundary
ggplot() +
geom_tile(data = grid, aes(x = x1, y = x2, fill = pred), alpha = 0.4) +
geom_point(data = train, aes(x = x1, y = x2, color = y), size = 2) +
labs(title = "SVM with RBF Kernel - Decision Boundary",
x = "x1", y = "x2") +
theme_minimal() +
scale_fill_manual(values = c("Class1" = "#99ccff", "Class2" = "#ffcccc")) +
scale_color_manual(values = c("Class1" = "blue", "Class2" = "red"))
Observations:
Except for 2-3 points(out of 200), the svm_rbf
model
is classifying pretty well.
So far this has the best classification error rate
# Predict on grid
grid$pred <- predict(svm_poly, newdata = grid)
# Plot decision boundary
ggplot() +
geom_tile(data = grid, aes(x = x1, y = x2, fill = pred), alpha = 0.4) +
geom_point(data = train, aes(x = x1, y = x2, color = y), size = 2) +
labs(title = "SVM with Polynomial(degree =3) Kernel - Decision Boundary",
x = "x1", y = "x2") +
theme_minimal() +
scale_fill_manual(values = c("Class1" = "#99ccff", "Class2" = "#ffcccc")) +
scale_color_manual(values = c("Class1" = "blue", "Class2" = "red"))
Observations:
Misclassifies alot of CLass1
as
CLass2
This better than linear but not as good as Radial Kernel
Conclusion:
As expected Radial kernel outperforms all the other kernels SVM. Since the data is initially classified based on circle decision boundary –> \({X_1}^2 + {X_2}^2 = 2.25\)
Even from errors reported above, Radial is the clear winner!
set.seed(96)
data(Auto)
# (a) Create binary variable: 1 if mpg > median, else 0
Auto <- Auto %>%
mutate(highMPG = ifelse(mpg > median(mpg), 1, 0)) %>%
select(-mpg) # Remove mpg since it's now the target
# Convert highMPG to factor for classification
Auto$highMPG <- as.factor(Auto$highMPG)
# Define tuning grid for cost
tune_out_linear <- tune(svm,
highMPG ~ .,
data = Auto,
kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100)),
scale = TRUE)
# Best model and CV errors
summary(tune_out_linear)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.01
##
## - best performance: 0.09198718
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.09198718 0.03884572
## 2 1e-01 0.10224359 0.03456688
## 3 1e+00 0.09955128 0.03728256
## 4 1e+01 0.10980769 0.04862607
## 5 1e+02 0.10974359 0.04699767
the best cost is 0.01 (which is also the standard value used)
As cost increases, the error slightly increases — meaning higher penalty for misclassifications doesn’t improve generalization
It can be explained since a lower cost in linear SVM allows for a wider margin, tolerating some misclassified points for better generalization.
Radial
tune_out_radial <- tune(svm,
highMPG ~ .,
data = Auto,
kernel = "radial",
ranges = list(cost = c(0.1, 1, 10),
gamma = c(0.5, 1, 2)))
summary(tune_out_radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 1 0.5
##
## - best performance: 0.07916667
##
## - Detailed performance results:
## cost gamma error dispersion
## 1 0.1 0.5 0.08948718 0.03046532
## 2 1.0 0.5 0.07916667 0.03085406
## 3 10.0 0.5 0.07916667 0.03527292
## 4 0.1 1.0 0.57384615 0.03126062
## 5 1.0 1.0 0.08173077 0.03986661
## 6 10.0 1.0 0.07916667 0.03085406
## 7 0.1 2.0 0.57384615 0.03126062
## 8 1.0 2.0 0.12256410 0.04509489
## 9 10.0 2.0 0.12000000 0.04382978
Observations & Interpretations
Cost = 1.0 and Gamma = 0.5 seem to yield the best results, with a CV error of 0.0792 same as 0.0792 for Cost = 10 and Gamma = 0.5
Lower values of gamma (0.5) tend to give more stable and better performance with low error rates.
The dispersion (standard deviation) of the cross-validation errors is fairly low for most settings (typically between 0.03 and 0.05), which means the model’s performance is quite stable across different folds.
The best radial kernel results (CV error ≈ 7.9%) are better than the best linear kernel results from part (b), which had a CV error of ~9.2%
Polynomial Kernel
tune_out_poly <- tune(svm,
highMPG ~ .,
data = Auto,
kernel = "polynomial",
ranges = list(cost = c(0.1, 1, 10),
degree = c(2, 3, 4)))
summary(tune_out_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree
## 10 2
##
## - best performance: 0.5383974
##
## - Detailed performance results:
## cost degree error dispersion
## 1 0.1 2 0.5611538 0.03806443
## 2 1.0 2 0.5611538 0.03806443
## 3 10.0 2 0.5383974 0.05626799
## 4 0.1 3 0.5611538 0.03806443
## 5 1.0 3 0.5611538 0.03806443
## 6 10.0 3 0.5611538 0.03806443
## 7 0.1 4 0.5611538 0.03806443
## 8 1.0 4 0.5611538 0.03806443
## 9 10.0 4 0.5611538 0.03806443
Observations & Interpretations
No significant improvement in performance between degree 2, 3, and 4. The CV error remains constant around 0.5612.
Which imply that the data is simple enough for higher degrees, thus degree 2 should be enough
Across all values of cost (0.1, 1.0, 10.0), the CV error remains nearly identical, with only slight variations.
Cost = 10 seems to give a slightly lower error (0.5384), but the difference is minimal.
The dispersion is very stable across all parameter combinations, especially at degree 2, 3, and 4 with values around 0.0381.
Conclusion : The radial kernel outperforms the polynomial kernel and linear kernel by a noticeable margin on this data, showing that non-linear separability is best captured by the radial kernel for this problem.
# Extract best models
best_linear <- tune_out_linear$best.model
best_radial <- tune_out_radial$best.model
best_poly <- tune_out_poly$best.model
# Training error
train_err <- function(model) {
mean(predict(model, Auto) != Auto$highMPG)
}
# Create a data frame with the training errors
training_errors_df <- data.frame(
Model = c("Linear", "Radial", "Polynomial"),
Training_Error = c(train_err(best_linear), train_err(best_radial), train_err(best_poly))
)
# Print the data frame with formatted output
print(format(training_errors_df, digits = 3), row.names = FALSE)
## Model Training_Error
## Linear 0.0867
## Radial 0.0179
## Polynomial 0.4311
# Linear SVM plot
plot(best_linear, Auto, horsepower ~ weight)
# Radial SVM plot
plot(best_radial, Auto, displacement ~ acceleration)
# Polynomial SVM plot
plot(best_poly, Auto, cylinders ~ horsepower)
Conclusion from Training Errors: The radial kernel SVM is clearly the best performer in terms of training error, which aligns with conclusion from the best test error rate as well.
From Linear SVM Plot (weight and horsepower): Even though the exact decision boundary is not clearly defined, the support vectors closer to the range 2500,3700 weight and horsepower is from 60 to 130. Within this range a lot of support vectors are present
From Radial SVM Plot (accelaration and
displacement): There is on pattern visible from the plot. But
we can see lot of X
which implies the number of support
vectors are more compared to linear
From Best Polynomial SVM Plot (horsepower and cylinders): We can see alot of X as well but no clear boundary as was the case in all above plot. We can see a that alot of overlap of support vectors, this pattern could from horse power from 50 to 100 in light red and a lot bigger range for black X marks above at 8 cylinders. Its hard to tell whether the Polynomial SVM is better than Radial from the plots itself, thus we have to rely on test errors
set.seed(96)
data(OJ)
train_index <- sample(1:nrow(OJ), 800)
train_data <- OJ[train_index, ]
test_data <- OJ[-train_index, ]
svm_linear <- svm(Purchase ~ ., data = train_data, kernel = "linear", cost = 0.01, scale = TRUE)
summary(svm_linear)
##
## Call:
## svm(formula = Purchase ~ ., data = train_data, kernel = "linear",
## cost = 0.01, scale = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 433
##
## ( 217 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
Conclusion from summary
A total of 433 support vectors (out of 800) are support the
linear decision boundary and 217 for CH
and 216 for
MM
Pretty even split among the two classes
# Predictions
train_pred_linear <- predict(svm_linear, train_data)
test_pred_linear <- predict(svm_linear, test_data)
# Error rates
train_error_linear <- mean(train_pred_linear != train_data$Purchase)
test_error_linear <- mean(test_pred_linear != test_data$Purchase)
cat("Training Error (Linear):", train_error_linear, "\n")
## Training Error (Linear): 0.1625
cat("Test Error (Linear):", test_error_linear, "\n")
## Test Error (Linear): 0.1851852
Interpretations
Nearly 16% of the training dataset is misclassified. While 18% of the test data is misclassified
The training error and test error are pretty close indicating low variance and model is not overfitting.
A more flexible model can used.
tune_linear <- tune(svm, Purchase ~ ., data = train_data,
kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune_linear)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 5
##
## - best performance: 0.16375
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17250 0.03525699
## 2 0.10 0.16500 0.03899786
## 3 1.00 0.16875 0.04093101
## 4 5.00 0.16375 0.03606033
## 5 10.00 0.16375 0.03508422
Observations
best_linear_model <- svm(Purchase ~ ., data = train_data, kernel = "linear", cost = 10, scale = TRUE)
# Predictions
train_pred_best_linear <- predict(best_linear_model, train_data)
test_pred_best_linear <- predict(best_linear_model, test_data)
# Error rates
train_error_best_linear <- mean(train_pred_best_linear != train_data$Purchase)
test_error_best_linear <- mean(test_pred_best_linear != test_data$Purchase)
cat("Best Training Error (Linear):", train_error_best_linear, "\n")
## Best Training Error (Linear): 0.15625
cat("Best Test Error (Linear):", test_error_best_linear, "\n")
## Best Test Error (Linear): 0.1962963
Conclusions:
Linear SVM at cost = 10 slightly improved training accuracy.
However, the generalization to the test set is not better, which shows that linear decision boundaries might not be sufficient for this problem.
# (b) Radial SVM with default gamma and cost = 0.01
svm_radial <- svm(Purchase ~ ., data = train_data, kernel = "radial", cost = 0.01)
summary(svm_radial)
##
## Call:
## svm(formula = Purchase ~ ., data = train_data, kernel = "radial",
## cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 627
##
## ( 315 312 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
# (c) Errors
train_error_radial <- mean(predict(svm_radial, train_data) != train_data$Purchase)
test_error_radial <- mean(predict(svm_radial, test_data) != test_data$Purchase)
# (d) Tune cost
set.seed(123)
tune_radial <- tune(svm, Purchase ~ ., data = train_data,
kernel = "radial",
ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune_radial)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39000 0.06174545
## 2 0.10 0.18625 0.04730589
## 3 1.00 0.17000 0.05407043
## 4 5.00 0.17500 0.04208127
## 5 10.00 0.17875 0.04825065
627 suppport vectors and evenly split.
The best cost is 1.00 since it has lowest error and decent dispersion as well.
# (e) Best radial errors
best_radial_model <- svm(Purchase ~ ., data = train_data, kernel = "radial", cost = 1)
train_error_best_radial <- mean(predict(best_radial_model, train_data) != train_data$Purchase)
test_error_best_radial <- mean(predict(best_radial_model, test_data) != test_data$Purchase)
cat("Best Training Error (Radial):", train_error_best_radial, "\n")
## Best Training Error (Radial): 0.14625
cat("Best Test Error (Radial):", test_error_best_radial, "\n")
## Best Test Error (Radial): 0.1925926
Radial SVM performs slightly better than the linear SVM on both training and test sets.
The improvement isn’t huge, but the radial kernel captures non-linear boundaries a bit better, which is expected if the data is not linearly separable.
# (b) Polynomial SVM (degree 2, cost = 0.01)
svm_poly <- svm(Purchase ~ ., data = train_data,
kernel = "polynomial", degree = 2, cost = 0.01)
summary(svm_poly)
##
## Call:
## svm(formula = Purchase ~ ., data = train_data, kernel = "polynomial",
## degree = 2, cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 627
##
## ( 315 312 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
# (c) Errors
train_error_poly <- mean(predict(svm_poly, train_data) != train_data$Purchase)
test_error_poly <- mean(predict(svm_poly, test_data) != test_data$Purchase)
# (d) Tune cost
tune_poly <- tune(svm, Purchase ~ ., data = train_data,
kernel = "polynomial", degree = 2,
ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune_poly)
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.17875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.38250 0.04866267
## 2 0.10 0.32500 0.06508541
## 3 1.00 0.20000 0.06692658
## 4 5.00 0.18375 0.04860913
## 5 10.00 0.17875 0.05138701
Very high number of support vectors (627 out of 800) at cost = 0.01. same as radical SVM
Best cost = 10 with lowest cross-validation error (0.17).
As cost increases, error decreases, similar to radial SVM behavior.
# (e) Best poly errors
best_poly_model <- svm(Purchase ~ ., data = train_data,
kernel = "polynomial", degree = 2, cost = 10)
train_error_best_poly <- mean(predict(best_poly_model, train_data) != train_data$Purchase)
test_error_best_poly <- mean(predict(best_poly_model, test_data) != test_data$Purchase)
cat("Best Training Error (Poly):", train_error_best_poly, "\n")
## Best Training Error (Poly): 0.145
cat("Best Test Error (Poly):", test_error_best_poly, "\n")
## Best Test Error (Poly): 0.2037037
error_summary <- data.frame(
Model = c("Linear (Tuned)", "Radial (Tuned)", "Poly (Tuned)"),
Training_Error = c(train_error_best_linear, train_error_best_radial, train_error_best_poly),
Test_Error = c(test_error_best_linear, test_error_best_radial, test_error_best_poly)
)
print(error_summary)
## Model Training_Error Test_Error
## 1 Linear (Tuned) 0.15625 0.1962963
## 2 Radial (Tuned) 0.14625 0.1925926
## 3 Poly (Tuned) 0.14500 0.2037037
COnclusions:
Polynomial SVM had the lowest training error, very slightly better than radial.
All models are fairly close, so none are drastically over- or underfitting.
Radial kernel performs best overall, with the lowest test error (19.26%).
Linear is a close second (19.63%).
Polynomial SVM had the worst test error (20.37%), even though its training error was low — suggesting some overfitting to training data.
Therefore, Best model for this dataset: Radial SVM (cost = 1).