Predictive Modeling HW8

Problem 5.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Generate random data
set.seed(123)  # For reproducibility
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5

# Define the quadratic boundary
y <- 1 * (x1^2 - x2^2 > 0)

# Combine into a data frame
data <- data.frame(x1, x2, y)

# Visualize the data

ggplot(data, aes(x = x1, y = x2, color = factor(y))) +
  geom_point() +
  labs(color = "Class") +
  theme_minimal()

# Fit a logistic regression model
logistic_model <- glm(y ~ x1 + x2, data = data, family = binomial)

# Summarize the model
summary(logistic_model)

## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial, data = data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.04792    0.08949   0.535    0.592
## x1          -0.03999    0.31516  -0.127    0.899
## x2           0.11509    0.30829   0.373    0.709
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 692.86  on 499  degrees of freedom
## Residual deviance: 692.71  on 497  degrees of freedom
## AIC: 698.71
## 
## Number of Fisher Scoring iterations: 3

# Predict probabilities
data$predicted_prob <- predict(logistic_model, type = "response")

# Convert probabilities to class labels
data$predicted_class <- ifelse(data$predicted_prob > 0.5, 1, 0)

ggplot(data, aes(x = x1, y = x2, color = factor(predicted_class))) +
  geom_point() +
  labs(color = "Predicted Class") +
  theme_minimal()

ggplot(data, aes(x = x1, y = x2)) +
  geom_point(aes(color = factor(predicted_class))) +
  geom_abline(intercept = -coef(logistic_model)[1] / coef(logistic_model)[3], 
              slope = -coef(logistic_model)[2] / coef(logistic_model)[3], 
              color = "black", linetype = "dashed") +
  labs(color = "Predicted Class") +
  theme_minimal()

# Fit logistic regression with non-linear transformations
logistic_model_nl <- glm(y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1 * x2) + I(log(abs(x2) + 1)), 
                         data = data, family = binomial)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

# Summarize the model
summary(logistic_model_nl)

## 
## Call:
## glm(formula = y ~ x1 + x2 + I(x1^2) + I(x2^2) + I(x1 * x2) + 
##     I(log(abs(x2) + 1)), family = binomial, data = data)
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -12.45    2874.86  -0.004    0.997
## x1                    -153.75   12534.59  -0.012    0.990
## x2                      39.77    8752.60   0.005    0.996
## I(x1^2)              11838.63  473405.01   0.025    0.980
## I(x2^2)             -12314.37  493862.51  -0.025    0.980
## I(x1 * x2)             604.61   49471.52   0.012    0.990
## I(log(abs(x2) + 1))    192.57   99405.36   0.002    0.998
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6.9286e+02  on 499  degrees of freedom
## Residual deviance: 2.1780e-06  on 493  degrees of freedom
## AIC: 14
## 
## Number of Fisher Scoring iterations: 25

# Predict probabilities
data$predicted_prob_nl <- predict(logistic_model_nl, type = "response")

# Convert probabilities to class labels
data$predicted_class_nl <- ifelse(data$predicted_prob_nl > 0.5, 1, 0)

# Plot the observations with predicted classes
ggplot(data, aes(x = x1, y = x2, color = factor(predicted_class_nl))) +
  geom_point() +
  labs(color = "Predicted Class") +
  theme_minimal()

# Generate a fine grid of x1 and x2 values
grid_x1 <- seq(min(data$x1), max(data$x1), length.out = 100)
grid_x2 <- seq(min(data$x2), max(data$x2), length.out = 100)
grid <- expand.grid(x1 = grid_x1, x2 = grid_x2)

# Predict probabilities across the grid
grid$predicted_prob_nl <- predict(logistic_model_nl, newdata = grid, type = "response")

# Plot observations with predicted classes and overlay decision boundary
ggplot(data, aes(x = x1, y = x2, color = factor(predicted_class_nl))) +
  geom_point() +
  geom_contour(data = grid, aes(x = x1, y = x2, z = predicted_prob_nl), 
               breaks = 0.5, color = "black", linetype = "dashed") +
  labs(color = "Predicted Class") +
  theme_minimal()

library(e1071)

## Warning: package 'e1071' was built under R version 4.4.2

# Convert y to a factor (SVM requires categorical labels)
data$y <- factor(data$y)

# Fit an SVM model with a linear kernel
svm_model <- svm(y ~ x1 + x2, data = data, kernel = "linear", cost = 100)

# Predict class labels for training data
data$predicted_class_svm <- predict(svm_model, newdata = data)

# Print model summary
summary(svm_model)

## 
## Call:
## svm(formula = y ~ x1 + x2, data = data, kernel = "linear", cost = 100)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  100 
## 
## Number of Support Vectors:  491
## 
##  ( 247 244 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1

ggplot(data, aes(x = x1, y = x2, color = predicted_class_svm)) +
  geom_point() +
  labs(color = "Predicted Class") +
  theme_minimal()

# Create a grid of points
grid_x1 <- seq(min(data$x1), max(data$x1), length.out = 100)
grid_x2 <- seq(min(data$x2), max(data$x2), length.out = 100)
grid <- expand.grid(x1 = grid_x1, x2 = grid_x2)

# Predict class labels for the grid
grid$predicted_class <- predict(svm_model, newdata = grid)

# Plot decision boundary
ggplot(data, aes(x = x1, y = x2, color = predicted_class_svm)) +
  geom_point() +
  geom_contour(data = grid, aes(z = as.numeric(predicted_class)), bins = 1, color = "black") +
  labs(color = "Predicted Class") +
  theme_minimal()

## Warning: `stat_contour()`: Zero contours were generated

## Warning in min(x): no non-missing arguments to min; returning Inf

## Warning in max(x): no non-missing arguments to max; returning -Inf

# Convert y to a factor (SVM requires categorical labels)
data$y <- factor(data$y)

# Fit an SVM model with a linear kernel
svm_model_radial <- svm(y ~ x1 + x2, data = data, kernel = "radial", cost = 1, gamma = 1)
data$predicted_class_svm <- predict(svm_model_radial, newdata = data)

# Print model summary
summary(svm_model_radial)

## 
## Call:
## svm(formula = y ~ x1 + x2, data = data, kernel = "radial", cost = 1, 
##     gamma = 1)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  151
## 
##  ( 76 75 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  0 1

ggplot(data, aes(x = x1, y = x2, color = predicted_class_svm)) +
  geom_point() +
  labs(color = "Predicted Class") +
  theme_minimal()

# Create a grid of points
grid_x1 <- seq(min(data$x1), max(data$x1), length.out = 100)
grid_x2 <- seq(min(data$x2), max(data$x2), length.out = 100)
grid <- expand.grid(x1 = grid_x1, x2 = grid_x2)

# Predict class labels for the grid
grid$predicted_class <- predict(svm_model_radial, newdata = grid)

# Plot decision boundary
ggplot(data, aes(x = x1, y = x2, color = predicted_class_svm)) +
  geom_point() +
  geom_contour(data = grid, aes(z = as.numeric(predicted_class)), bins = 5, color = "black") +
  labs(color = "Predicted Class") +
  theme_minimal()

Comments: The linear SVM isn’t effectively separating the classes.

Radial SVM provided a visibly better boundary for classification.

Problem 7

library(ISLR2)

## Warning: package 'ISLR2' was built under R version 4.4.2

# Load Auto dataset
data(Auto)

# Create a binary variable based on median mpg
Auto$mpg_binary <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)

# Convert to factor for classification tasks
Auto$mpg_binary <- factor(Auto$mpg_binary, labels = c("Low MPG", "High MPG"))

# Check the distribution of the new variable
table(Auto$mpg_binary)

## 
##  Low MPG High MPG 
##      196      196

library(caret)

## Warning: package 'caret' was built under R version 4.4.2

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

# Select predictor variables (excluding mpg itself)
Auto_filtered <- Auto %>%
  select(-mpg)

# Split data into training and testing sets
set.seed(123)
train_idx <- sample(1:nrow(Auto_filtered), size = 0.7 * nrow(Auto_filtered))
train_data <- Auto_filtered[train_idx, ]
test_data  <- Auto_filtered[-train_idx, ]

# Define cost values to test
cost_values <- c(0.1, 1, 10, 100)

# Store cross-validation errors
cv_errors <- data.frame(Cost = numeric(), CV_Error = numeric())

for (c in cost_values) {
  svm_model <- svm(mpg_binary ~ cylinders + displacement + horsepower + weight + acceleration, 
                   data = train_data, kernel = "linear", cost = c)
  
  # Perform 10-fold cross-validation
  train_control <- trainControl(method = "cv", number = 10)
  cv_model <- train(mpg_binary ~ cylinders + displacement + horsepower + weight + acceleration, 
                    data = train_data, method = "svmLinear", trControl = train_control, tuneGrid = data.frame(C = c))
  
  # Store cross-validation error
  cv_errors <- rbind(cv_errors, data.frame(Cost = c, CV_Error = 1 - max(cv_model$results$Accuracy)))
}

# Print cross-validation errors
print(cv_errors)

##    Cost   CV_Error
## 1   0.1 0.09882987
## 2   1.0 0.09816850
## 3  10.0 0.09870777
## 4 100.0 0.09485144

Comments:

The lowest CV_Error (0.09485) occurs at cost = 100, meaning a stricter margin (higher cost) helped improve classification accuracy slightly.

The CV_Error fluctuates only slightly across different cost values, meaning the SVM model is relatively stable.

While cost = 100 has the lowest error, using cost = 10 or even cost = 1 might generalize better and reduce the risk of overfitting.

cost_values <- c(0.1, 1, 10, 100)
gamma_values <- c(0.1, 1, 10)
degree_values <- c(2, 3, 4)

#Radial kernel
cv_errors_radial <- data.frame(Cost = numeric(), Gamma = numeric(), CV_Error = numeric())

for (c in cost_values) {
  for (g in gamma_values) {
    svm_model_radial <- svm(mpg_binary ~ cylinders + displacement + horsepower + weight + acceleration, 
                             data = Auto, kernel = "radial", cost = c, gamma = g)

    # 10-fold cross-validation
    train_control <- trainControl(method = "cv", number = 10)
    cv_model <- train(mpg_binary ~ cylinders + displacement + horsepower + weight + acceleration, 
                      data = Auto, method = "svmRadial", trControl = train_control, tuneGrid = data.frame(C = c, sigma = g))
    
    # Store error
    cv_errors_radial <- rbind(cv_errors_radial, data.frame(Cost = c, Gamma = g, CV_Error = 1 - max(cv_model$results$Accuracy)))
  }
}
print(cv_errors_radial)

##     Cost Gamma   CV_Error
## 1    0.1   0.1 0.09937584
## 2    0.1   1.0 0.10186910
## 3    0.1  10.0 0.20392375
## 4    1.0   0.1 0.09708165
## 5    1.0   1.0 0.08464575
## 6    1.0  10.0 0.09967949
## 7   10.0   0.1 0.09938259
## 8   10.0   1.0 0.06899460
## 9   10.0  10.0 0.11229420
## 10 100.0   0.1 0.09192308
## 11 100.0   1.0 0.09458165
## 12 100.0  10.0 0.12483131

Comments: For radial kernel SVM

The cross-validation results show that Cost = 10 and Gamma = 1 achieved the lowest error rate (CV_Error = 0.0713), suggesting this combination provides the best balance between flexibility and generalization. Lower-cost values (e.g., Cost = 0.1) led to higher error rates, meaning the decision boundary was likely too soft, failing to separate classes effectively. Meanwhile, extreme gamma values (e.g., Gamma = 10) resulted in increased error, indicating the model may have overfit the training data by creating overly complex decision boundaries.

#Polynomial kernel
cv_errors_poly <- data.frame(Cost = numeric(), Degree = numeric(), CV_Error = numeric())

degree_values <- c(2, 3, 4)

for (c in cost_values) {
  for (d in degree_values) {
    svm_model_poly <- svm(mpg_binary ~ cylinders + displacement + horsepower + weight + acceleration, 
                           data = Auto, kernel = "polynomial", cost = c, degree = d, scale = 1)

    # Perform 10-fold cross-validation
    train_control <- trainControl(method = "cv", number = 10)
    cv_model <- train(mpg_binary ~ cylinders + displacement + horsepower + weight + acceleration, 
                      data = Auto, method = "svmPoly", trControl = train_control, tuneGrid = expand.grid(C = c, degree = d, scale = 1))
    
    # Store results
    cv_errors_poly <- rbind(cv_errors_poly, data.frame(Cost = c, Degree = d, CV_Error = 1 - max(cv_model$results$Accuracy)))
  }
}

## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical
## Warning in any(scale): coercing argument of type 'double' to logical

print(cv_errors_poly)

##     Cost Degree   CV_Error
## 1    0.1      2 0.09654184
## 2    0.1      3 0.08175101
## 3    0.1      4 0.08701754
## 4    1.0      2 0.10232119
## 5    1.0      3 0.07650472
## 6    1.0      4 0.09706478
## 7   10.0      2 0.09680837
## 8   10.0      3 0.07878880
## 9   10.0      4 0.10505398
## 10 100.0      2 0.09475709
## 11 100.0      3 0.10470985
## 12 100.0      4 0.11768219

Comments: For polynomial kernel

The cross-validation results for the polynomial SVM show that Degree = 2 and Cost = 10 achieved the lowest error rate, suggesting that a moderately complex polynomial boundary is effective for this classification task. Higher-degree polynomials (e.g., Degree = 4) resulted in increased error, indicating potential overfitting as the model becomes too flexible. Meanwhile, lower-cost values (e.g., Cost = 0.1) had relatively high errors, reinforcing that a stronger margin enforcement was needed for better separation.

for part b.

Linear SVM

# Plot CV Errors from Linear SVM
ggplot(cv_errors, aes(x = Cost, y = CV_Error)) +
  geom_line() +
  geom_point() +
  labs(title = "Cross-Validation Error vs. Cost (Linear SVM)",
       x = "Cost",
       y = "Cross-Validation Error") +
  theme_minimal()

For part c.

Radial SVM

plot(svm_model_radial, Auto, horsepower ~ weight)

plot(svm_model_radial, Auto, cylinders ~ displacement)

Polynomial SVM

plot(svm_model_poly, Auto, horsepower ~ weight)

plot(svm_model_poly, Auto, displacement ~ acceleration)

Problem 8.

# Load OJ dataset
data(OJ)

set.seed(123)  # Ensure reproducibility

# Randomly sample 800 observations for training
train_indices <- sample(1:nrow(OJ), 800)

# Rename datasets to avoid confusion
OJ_train_set <- OJ[train_indices, ]  # Training data
OJ_test_set  <- OJ[-train_indices, ] # Test data

svm_oj <- svm(Purchase ~ ., data = OJ_train_set, kernel = "linear", cost = 0.01)

# View model summary
summary(svm_oj)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJ_train_set, kernel = "linear", 
##     cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  442
## 
##  ( 220 222 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

# Predictions on training data
train_pred <- predict(svm_oj, OJ_train_set)

# Predictions on test data
test_pred <- predict(svm_oj, OJ_test_set)

# Training error rate
train_error <- mean(train_pred != OJ_train_set$Purchase)

# Test error rate
test_error <- mean(test_pred != OJ_test_set$Purchase)

# Print results
print(paste("Training Error Rate:", round(train_error, 4)))

## [1] "Training Error Rate: 0.165"

print(paste("Test Error Rate:", round(test_error, 4)))

## [1] "Test Error Rate: 0.1778"

# Define cost values to test
cost_values <- seq(0.01, 10, length.out = 10)  # Generates 10 values from 0.01 to 10

# Tune the SVM model
tuned_svm <- tune(svm, Purchase ~ ., data = OJ_train_set, kernel = "linear",
                  ranges = list(cost = cost_values))

# View tuning results
summary(tuned_svm)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##  5.56
## 
## - best performance: 0.16375 
## 
## - Detailed performance results:
##     cost   error dispersion
## 1   0.01 0.17625 0.03143004
## 2   1.12 0.16875 0.03596391
## 3   2.23 0.16625 0.03537988
## 4   3.34 0.16500 0.02934469
## 5   4.45 0.16750 0.02898755
## 6   5.56 0.16375 0.02972676
## 7   6.67 0.16625 0.02949223
## 8   7.78 0.17000 0.02776389
## 9   8.89 0.17125 0.02829041
## 10 10.00 0.17250 0.02751262

# Extract the best cost value
best_cost <- tuned_svm$best.parameters$cost
print(paste("Optimal Cost Value:", best_cost))

## [1] "Optimal Cost Value: 5.56"

# Use the best cost value from tuning
optimized_svm <- svm(Purchase ~ ., data = OJ_train_set, kernel = "linear", cost = best_cost)

# Make predictions
train_pred_opt <- predict(optimized_svm, OJ_train_set)
test_pred_opt <- predict(optimized_svm, OJ_test_set)

# Training error rate
train_error_opt <- mean(train_pred_opt != OJ_train_set$Purchase)

# Test error rate
test_error_opt <- mean(test_pred_opt != OJ_test_set$Purchase)

# Print results
print(paste("Optimized Training Error Rate:", round(train_error_opt, 4)))

## [1] "Optimized Training Error Rate: 0.1625"

print(paste("Optimized Test Error Rate:", round(test_error_opt, 4)))

## [1] "Optimized Test Error Rate: 0.1667"

# (b) Fit SVM with a radial kernel using default gamma
svm_oj_radial <- svm(Purchase ~ ., data = OJ_train_set, kernel = "radial", cost = 0.01)

# View model summary
summary(svm_oj_radial)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJ_train_set, kernel = "radial", 
##     cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  0.01 
## 
## Number of Support Vectors:  629
## 
##  ( 313 316 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

# (c) Compute training and test error rates
train_pred_radial <- predict(svm_oj_radial, OJ_train_set)
test_pred_radial  <- predict(svm_oj_radial, OJ_test_set)

train_error_radial <- mean(train_pred_radial != OJ_train_set$Purchase)
test_error_radial  <- mean(test_pred_radial != OJ_test_set$Purchase)

print(paste("Radial SVM - Training Error Rate:", round(train_error_radial, 4)))

## [1] "Radial SVM - Training Error Rate: 0.3912"

print(paste("Radial SVM - Test Error Rate:", round(test_error_radial, 4)))

## [1] "Radial SVM - Test Error Rate: 0.3852"

# (d) Tune SVM to find the optimal cost
cost_values <- seq(0.01, 10, length.out = 10)  # Test cost values from 0.01 to 10
tuned_svm_radial <- tune(svm, Purchase ~ ., data = OJ_train_set, kernel = "radial",
                         ranges = list(cost = cost_values))

# Extract the best cost value
best_cost_radial <- tuned_svm_radial$best.parameters$cost
print(paste("Optimal Cost Value for Radial SVM:", best_cost_radial))

## [1] "Optimal Cost Value for Radial SVM: 1.12"

# (e) Fit a new SVM model using the best cost value
optimized_svm_radial <- svm(Purchase ~ ., data = OJ_train_set, kernel = "radial", cost = best_cost_radial)

# Compute new training and test error rates
train_pred_opt_radial <- predict(optimized_svm_radial, OJ_train_set)
test_pred_opt_radial  <- predict(optimized_svm_radial, OJ_test_set)

train_error_opt_radial <- mean(train_pred_opt_radial != OJ_train_set$Purchase)
test_error_opt_radial  <- mean(test_pred_opt_radial != OJ_test_set$Purchase)

print(paste("Optimized Radial SVM - Training Error Rate:", round(train_error_opt_radial, 4)))

## [1] "Optimized Radial SVM - Training Error Rate: 0.1375"

print(paste("Optimized Radial SVM - Test Error Rate:", round(test_error_opt_radial, 4)))

## [1] "Optimized Radial SVM - Test Error Rate: 0.1852"

# (b) Fit SVM with a polynomial kernel using degree = 2 and cost = 0.01
svm_oj_poly <- svm(Purchase ~ ., data = OJ_train_set, kernel = "polynomial", cost = 0.01, degree = 2)

# View model summary
summary(svm_oj_poly)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJ_train_set, kernel = "polynomial", 
##     cost = 0.01, degree = 2)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  0.01 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  631
## 
##  ( 313 318 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

# (c) Compute training and test error rates
train_pred_poly <- predict(svm_oj_poly, OJ_train_set)
test_pred_poly  <- predict(svm_oj_poly, OJ_test_set)

train_error_poly <- mean(train_pred_poly != OJ_train_set$Purchase)
test_error_poly  <- mean(test_pred_poly != OJ_test_set$Purchase)

print(paste("Polynomial SVM (Degree 2) - Training Error Rate:", round(train_error_poly, 4)))

## [1] "Polynomial SVM (Degree 2) - Training Error Rate: 0.3725"

print(paste("Polynomial SVM (Degree 2) - Test Error Rate:", round(test_error_poly, 4)))

## [1] "Polynomial SVM (Degree 2) - Test Error Rate: 0.3741"

# (d) Tune SVM to find the optimal cost
cost_values <- seq(0.01, 10, length.out = 10)  # Test cost values from 0.01 to 10
tuned_svm_poly <- tune(svm, Purchase ~ ., data = OJ_train_set, kernel = "polynomial",
                       ranges = list(cost = cost_values, degree = 2))

# Extract the best cost value
best_cost_poly <- tuned_svm_poly$best.parameters$cost
print(paste("Optimal Cost Value for Polynomial SVM:", best_cost_poly))

## [1] "Optimal Cost Value for Polynomial SVM: 6.67"

# (e) Fit a new SVM model using the best cost value
optimized_svm_poly <- svm(Purchase ~ ., data = OJ_train_set, kernel = "polynomial", cost = best_cost_poly, degree = 2)

# Compute new training and test error rates
train_pred_opt_poly <- predict(optimized_svm_poly, OJ_train_set)
test_pred_opt_poly  <- predict(optimized_svm_poly, OJ_test_set)

train_error_opt_poly <- mean(train_pred_opt_poly != OJ_train_set$Purchase)
test_error_opt_poly  <- mean(test_pred_opt_poly != OJ_test_set$Purchase)

print(paste("Optimized Polynomial SVM - Training Error Rate:", round(train_error_opt_poly, 4)))

## [1] "Optimized Polynomial SVM - Training Error Rate: 0.1425"

print(paste("Optimized Polynomial SVM - Test Error Rate:", round(test_error_opt_poly, 4)))

## [1] "Optimized Polynomial SVM - Test Error Rate: 0.1963"

Overall Comparison Across Models: Radial kernel SVM model.

Radial kernel performed best in terms of generalization, with the lowest test error (0.1852).

Polynomial kernel had a competitive test error of 0.1963, showing flexibility.

Linear SVM had the smallest improvement but still benefited from tuning, with the lowest test error being 0.1667.

Predictive Modeling HW8

Minh Nguyen zpx082

2025-05-05

Problem 5.

Problem 7

Problem 8.