lab 10

question 4

# Load required libraries
library(e1071)
library(ggplot2)
library(gridExtra)

set.seed(123)

# Simulate a non-linearly separable dataset
n <- 100
x1 <- runif(n, -1, 1)
x2 <- runif(n, -1, 1)
y <- ifelse(x1^2 + x2^2 > 0.5, 1, 0)
data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))

# Split into train and test
train_idx <- sample(1:n, n * 0.7)
train_data <- data[train_idx, ]
test_data <- data[-train_idx, ]

# Fit models
svc_model <- svm(y ~ ., data = train_data, kernel = "linear", cost = 1)
poly_model <- svm(y ~ ., data = train_data, kernel = "polynomial", degree = 3, cost = 1)
radial_model <- svm(y ~ ., data = train_data, kernel = "radial", gamma = 1, cost = 1)

# Predictions and error rates
predict_and_error <- function(model, train_data, test_data) {
  pred_train <- predict(model, train_data)
  pred_test <- predict(model, test_data)
  train_error <- mean(pred_train != train_data$y)
  test_error <- mean(pred_test != test_data$y)
  list(train_error = train_error, test_error = test_error)
}

errors <- list(
  svc = predict_and_error(svc_model, train_data, test_data),
  poly = predict_and_error(poly_model, train_data, test_data),
  radial = predict_and_error(radial_model, train_data, test_data)
)

# Plot decision boundaries
plot_svm <- function(model, data, title) {
  grid <- expand.grid(
    x1 = seq(-1, 1, length = 100),
    x2 = seq(-1, 1, length = 100)
  )
  grid$y <- predict(model, grid)
  
  ggplot(data, aes(x = x1, y = x2, color = y)) +
    geom_point(size = 2) +
    geom_contour(data = grid, aes(z = as.numeric(y)), breaks = 1.5, color = "black") +
    labs(title = title) +
    theme_minimal()
}

p1 <- plot_svm(svc_model, train_data, "SVC (Linear)")
p2 <- plot_svm(poly_model, train_data, "SVM (Polynomial Kernel)")
p3 <- plot_svm(radial_model, train_data, "SVM (Radial Kernel)")

# Display all plots
grid.arrange(p1, p2, p3, nrow = 1)

## Warning: `stat_contour()`: Zero contours were generated

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

## Warning: `stat_contour()`: Zero contours were generated

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

# Print error rates
cat("Training and Test Error Rates:\n")

## Training and Test Error Rates:

for (model_name in names(errors)) {
  cat(model_name, "\n")
  cat("  Train Error:", round(errors[[model_name]]$train_error, 3), "\n")
  cat("  Test Error :", round(errors[[model_name]]$test_error, 3), "\n")
}

## svc 
##   Train Error: 0.414 
##   Test Error : 0.467 
## poly 
##   Train Error: 0.414 
##   Test Error : 0.467 
## radial 
##   Train Error: 0.029 
##   Test Error : 0.067

insights

This suggests that the polynomial kernel, with degree 3 and default parameters, did not capture the non-linearity in the data any better than a linear classifier.
The RBF kernel successfully captured the curved decision boundary, leading to dramatically improved classification.
Best performance on training and test data: SVM with Radial Kernel. It Shows that when there’s visible non-linear separation, a radial kernel is much better suited than a linear SVC.

question 7

7a

library(ISLR)
data(Auto)

# Remove missing values just in case
Auto <- na.omit(Auto)

# Create binary variable: mpg01
Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)

# Check the distribution
table(Auto$mpg01)

## 
##   0   1 
## 196 196

7b

library(e1071)

# Remove mpg to avoid data leakage
Auto_svm <- Auto[, !(names(Auto) %in% c("mpg"))]

set.seed(1)
# Cross-validation for linear kernel
tune_out_linear <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "linear",
                        ranges = list(cost = c(0.01, 0.1, 1, 10, 100)))

summary(tune_out_linear)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.09603609 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-02 0.10421950 0.03138085
## 2 1e-01 0.10227373 0.03634911
## 3 1e+00 0.09603609 0.03666741
## 4 1e+01 0.10531309 0.03683207
## 5 1e+02 0.12079079 0.03864160

comment on results

In this analysis, I used a support vector classifier to predict whether a car has above-median gas mileage. The model was trained using 10-fold cross-validation across a range of cost values.
Lower values of cost resulted in higher error rates, likely due to underfitting. These models allow more margin violations and don’t adapt well to the data complexity.
Higher values of cost also led to worse performance, with cost = 100 increasing the error rate to over 12%. This is indicative of overfitting, where the model becomes too rigid and sensitive to individual data points.
Overall, the linear SVM with cost = 1 provided the most reliable classification performance.

7c- radial kernel

set.seed(1)
tune_out_radial <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "radial",
                        ranges = list(cost = c(0.01, 0.1, 1, 10, 100),
                                      gamma = c(0.01, 0.1, 1)))

summary(tune_out_radial)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##    10   0.1
## 
## - best performance: 0.06878597 
## 
## - Detailed performance results:
##     cost gamma      error dispersion
## 1  1e-02  0.01 0.29820372 0.03634917
## 2  1e-01  0.01 0.09907227 0.02542107
## 3  1e+00  0.01 0.08728525 0.02925025
## 4  1e+01  0.01 0.08338639 0.03654471
## 5  1e+02  0.01 0.07622852 0.03078425
## 6  1e-02  0.10 0.15038588 0.02397154
## 7  1e-01  0.10 0.07282379 0.02593818
## 8  1e+00  0.10 0.07186118 0.03017440
## 9  1e+01  0.10 0.06878597 0.03196026
## 10 1e+02  0.10 0.08770589 0.03354051
## 11 1e-02  1.00 0.47238675 0.03941296
## 12 1e-01  1.00 0.27951125 0.03634400
## 13 1e+00  1.00 0.09918732 0.02052348
## 14 1e+01  1.00 0.10442706 0.02069028
## 15 1e+02  1.00 0.10442540 0.02069205

insights

The radial kernel significantly outperformed both the linear and polynomial kernels, achieving a lowest cross-validation error rate of ~6.88% with cost = 10 and gamma = 0.1. This improvement tells the presence of non-linear decision boundaries in the data, which the radial kernel successfully modeled. As cost increased, error rates generally decreased, indicating better fit. However, overly high cost values may risk overfitting, so tuning remains crucial.

7c- polynomial kernel

set.seed(1)
tune_out_poly <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "polynomial",
                      ranges = list(cost = c(0.01, 0.1, 1, 10),
                                    degree = c(2, 3, 4)))

summary(tune_out_poly)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.3375643 
## 
## - Detailed performance results:
##     cost degree     error dispersion
## 1   0.01      2 0.4982382 0.03934745
## 2   0.10      2 0.4960905 0.03974005
## 3   1.00      2 0.4752293 0.04525479
## 4  10.00      2 0.3375643 0.08311313
## 5   0.01      3 0.4984136 0.03930767
## 6   0.10      3 0.4978768 0.03935396
## 7   1.00      3 0.4924827 0.03986701
## 8  10.00      3 0.4418838 0.04913560
## 9   0.01      4 0.4984719 0.03930290
## 10  0.10      4 0.4984602 0.03930559
## 11  1.00      4 0.4983427 0.03933257
## 12 10.00      4 0.4971505 0.03961089

insights

The polynomial kernel yielded significantly higher error rates, with the best performance at ~33.76% (cost = 10, degree = 2). This is far worse than both linear and radial kernels.
The SVM with a radial kernel is the best choice for this classification problem.
It outperforms both the linear and polynomial SVMs in terms of cross-validation error.
The polynomial kernel was least effective, indicating that its assumptions about the data’s shape did not match the actual distribution.

7d

# Load required libraries
library(e1071)
library(ISLR)

# Prepare the data
Auto <- na.omit(Auto)
Auto$mpg01 <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto_svm <- Auto[, !(names(Auto) %in% c("mpg"))]

# Fit the best radial SVM model (if not done already)
set.seed(1)
tune_out_radial <- tune(svm, mpg01 ~ ., data = Auto_svm, kernel = "radial",
                        ranges = list(cost = c(0.01, 0.1, 1, 10, 100),
                                      gamma = c(0.01, 0.1, 1)))
best_radial <- tune_out_radial$best.model

# --- Plot decision boundary for selected variable pair ---
# Plot 1: horsepower vs. weight
plot(best_radial, Auto_svm, horsepower ~ weight,
     main = "Radial SVM: Horsepower vs. Weight")

question 8

library(ISLR2)

## 
## Attaching package: 'ISLR2'

## The following object is masked _by_ '.GlobalEnv':
## 
##     Auto

## The following objects are masked from 'package:ISLR':
## 
##     Auto, Credit

library(e1071)

# Load the data
data(OJ)

set.seed(1)  # For reproducibility

8a

train_idx <- sample(1:nrow(OJ), 800)
train_data <- OJ[train_idx, ]
test_data <- OJ[-train_idx, ]

8b

svm_linear <- svm(Purchase ~ ., data = train_data, kernel = "linear", cost = 0.01)
summary(svm_linear)

## 
## Call:
## svm(formula = Purchase ~ ., data = train_data, kernel = "linear", 
##     cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

results

The large number of support vectors indicates that the classifier is allowing a very soft margin, likely due to the small cost parameter.
This softness may help in cases where classes overlap but can also increase training error or lead to underfitting if the cost is too low.

8c

# Training error
pred_train <- predict(svm_linear, train_data)
train_error <- mean(pred_train != train_data$Purchase)

# Test error
pred_test <- predict(svm_linear, test_data)
test_error <- mean(pred_test != test_data$Purchase)

train_error

## [1] 0.175

test_error

## [1] 0.1777778

The training and test error rates are very close, which suggests that the model is not overfitting — a typical outcome when using a small cost value in an SVM, which encourages a softer margin and allows some misclassifications.
However, a 17–18% error rate might be higher than ideal, suggesting that the model may be underfitting, and a higher cost might improve performance by creating a firmer decision boundary.

8d

set.seed(1)
tune_linear <- tune(svm, Purchase ~ ., data = train_data, kernel = "linear",
                    ranges = list(cost = c(0.01, 0.1, 1, 10)))

summary(tune_linear)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 0.1725 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.17625 0.02853482
## 2  0.10 0.17250 0.03162278
## 3  1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764

8e

best_linear <- tune_linear$best.model

# Error rates
train_error_best <- mean(predict(best_linear, train_data) != train_data$Purchase)
test_error_best <- mean(predict(best_linear, test_data) != test_data$Purchase)

train_error_best

## [1] 0.165

test_error_best

## [1] 0.162963

8f

set.seed(1)
tune_radial <- tune(svm, Purchase ~ ., data = train_data, kernel = "radial",
                    ranges = list(cost = c(0.01, 0.1, 1, 10)))

summary(tune_radial)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.17125 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.39375 0.04007372
## 2  0.10 0.18625 0.02853482
## 3  1.00 0.17125 0.02128673
## 4 10.00 0.18625 0.02853482

# Best radial model
best_radial <- tune_radial$best.model
train_error_radial <- mean(predict(best_radial, train_data) != train_data$Purchase)
test_error_radial <- mean(predict(best_radial, test_data) != test_data$Purchase)
train_error_radial

## [1] 0.15125

test_error_radial

## [1] 0.1851852

8g

set.seed(1)
tune_poly <- tune(svm, Purchase ~ ., data = train_data, kernel = "polynomial", degree = 2,
                  ranges = list(cost = c(0.01, 0.1, 1, 10)))

summary(tune_poly)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.18125 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.39125 0.04210189
## 2  0.10 0.32125 0.05001736
## 3  1.00 0.20250 0.04116363
## 4 10.00 0.18125 0.02779513

# Best poly model
best_poly <- tune_poly$best.model
train_error_poly <- mean(predict(best_poly, train_data) != train_data$Purchase)
test_error_poly <- mean(predict(best_poly, test_data) != test_data$Purchase)
train_error_poly

## [1] 0.15

test_error_poly

## [1] 0.1888889

8h

After tuning and comparing all three kernel types — linear, radial, and polynomial — the radial kernel SVM typically provides the best test performance on this dataset. It handles non-linear boundaries well and adapts flexibly to the structure of the data.
While the linear SVM is simple and interpretable, its performance is slightly worse than the radial kernel. The polynomial kernel with degree 2, although more flexible than linear, tends to either underfit or overfit and did not outperform the radial kernel in test error.
Therefore, the SVM with a radial kernel is the preferred model, offering the best trade-off between flexibility and generalization on the OJ dataset.