library(tidyverse)
library(openintro)
library(e1071)
library(caret)
library(ISLR)
library(ISLR2)
library(kernlab)
library(knitr)
df <- data.frame(x1 = x1, x2 = x2, y = factor(y))
ggplot(df, aes(x = x1, y = x2, color = y)) +
geom_point(alpha = 0.6) +
labs(x = "X1", y = "X2", color = "Class") +
theme_minimal() +
coord_fixed()
glm_prob <- predict(log_model, newdata=df, type='response')
glm_pred <- ifelse(glm_prob > 0.5,1,0)
df$predicted <- factor(glm_pred)
ggplot(df, aes(x = x1, y = x2, color = predicted)) +
geom_point(alpha = 0.6) +
labs(title = "Logistic Regression with Linear Terms: Predicted Classes",
x = "X1", y = "X2", color = "Predicted Class") +
theme_minimal() +
coord_fixed()
df$x1_sq <- df$x1^2
df$x2_sq <- df$x2^2
df$x1_x2 <- df$x1 * df$x2
log_model_2 <- glm(y ~ x1 + x2 + x1_sq + x2_sq + x1_x2, data = df, family = binomial)## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
glm_prob_2 <- predict(log_model_2, newdata=df, type='response')
glm_pred_2 <- ifelse(glm_prob_2>0.5,1,0)
df$predicted_2 <- factor(glm_pred_2)
ggplot(df, aes(x = x1, y = x2, color = predicted_2)) +
geom_point(alpha = 0.6) +
labs(title = "Logistic Regression with Non-linear Terms: Predicted Classes",
x = "X1", y = "X2", color = "Predicted Class") +
theme_minimal() +
coord_fixed()
svm <- svm(y ~ x1 + x2, data = df, kernel = "linear", cost = 1)
df$svm_pred <- predict(svm)
ggplot(df, aes(x = x1, y = x2, color = svm_pred)) +
geom_point(alpha = 0.6) +
labs(title = "Support Vector Classifier (Linear Kernel): Predicted Classes",
x = "X1", y = "X2", color = "Predicted Class") +
theme_minimal() +
coord_fixed()
svm_2 <- svm(y ~ x1 + x2, data = df, kernel = "radial", cost = 1, gamma = 1)
df$svm_2_pred <- predict(svm_2)
ggplot(df, aes(x = x1, y = x2, color = svm_2_pred)) +
geom_point(alpha = 0.6) +
labs(title = "SVM with Radial Kernel: Predicted Classes",
x = "X1", y = "X2", color = "Predicted Class") +
theme_minimal() +
coord_fixed()
cm_log <- confusionMatrix(factor(df$predicted), factor(df$y))
cm_log_2 <- confusionMatrix(factor(df$predicted_2), factor(df$y))
cm_svm <- confusionMatrix(factor(df$svm_pred), factor(df$y))## Warning in confusionMatrix.default(factor(df$svm_pred), factor(df$y)): Levels
## are not in the same order for reference and data. Refactoring data to match.
cm_svm_2 <- confusionMatrix(factor(df$svm_2_pred), factor(df$y))
accuracy_table <- data.frame(
Model = c("Logistic Regression (Linear)",
"Logistic Regression (Non-linear)",
"SVM (Linear Kernel)",
"SVM (RBF Kernel)"),
Accuracy = c(
cm_log$overall["Accuracy"],
cm_log_2$overall["Accuracy"],
cm_svm$overall["Accuracy"],
cm_svm_2$overall["Accuracy"]
)
)
print(accuracy_table)## Model Accuracy
## 1 Logistic Regression (Linear) 0.570
## 2 Logistic Regression (Non-linear) 1.000
## 3 SVM (Linear Kernel) 0.522
## 4 SVM (RBF Kernel) 0.972
Both the non-linear SVM (rbf kernel) and the non-linear logistic regression performed much better than their linear counterparts. The non-linear varieties were able to capture the binomial (0/1) nature of the data and had better prediction accuracy. Whereas the linear models were continuing to predict the majority class of 0 due to being unable to capture the quadratic pattern.
Auto data set.
## The following object is masked from package:lubridate:
##
## origin
## The following object is masked from package:ggplot2:
##
## mpg
cost, in
order to predict whether a car gets high or low gas mileage. Report the
cross-validation errors associated with different values of this
parameter. Comment on your results. Note you will need to fit the
classifier without the gas mileage variable to produce sensible
results.
Auto_svm <- Auto[, !(names(Auto) %in% c("mpg"))]
set.seed(1)
auto_svm_model <- tune(e1071::svm, mpglevel ~ ., data = Auto_svm, kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 1, 10, 100)), scale = TRUE)
summary(auto_svm_model)##
## Parameter tuning of 'e1071::svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.09603609
##
## - Detailed performance results:
## cost error dispersion
## 1 1e-02 0.10421950 0.03138085
## 2 1e-01 0.10227373 0.03634911
## 3 1e+00 0.09603609 0.03666741
## 4 1e+01 0.10531309 0.03683207
## 5 1e+02 0.12079079 0.03864160
When using the linear SVM model, a cost of 1 results in
the lowest error rate and highest accuracy (90.4%). Increasing cost
doesn’t seem to improve this accuracy and could potentially lead to
overfitting.
gamma and degree and
cost. Comment on your results.
set.seed(1)
auto_svm_model_2 <- tune(e1071::svm, mpglevel ~ ., data = Auto_svm, kernel = "radial",
ranges = list(cost = c(0.01, 0.1, 1, 10), gamma = c(0.01, 0.1, 1)))
print(auto_svm_model_2)##
## Parameter tuning of 'e1071::svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost gamma
## 10 0.1
##
## - best performance: 0.06878595
auto_svm_model_3 <- tune(e1071::svm, mpglevel ~ ., data = Auto_svm, kernel = "polynomial",
ranges = list(cost = c(0.01, 0.1, 1, 10), degree = c(2, 3, 4),
scale = c(1, 2)))
print(auto_svm_model_3)##
## Parameter tuning of 'e1071::svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost degree scale
## 10 2 1
##
## - best performance: 0.3982798
When using the radial SVM model, a cost of 10 and
gamma of 0.1 results in the lowest cross-validation error
and best accuracy 93.1%). The polynomial SVM model has a peak
performance when cost is 10, degree is 2, and
scale is 1. However, the accuracy is much worse at 60.2%.
The radial model severely outperforms the polynomial model in this case.
The radial model also slightly outperforms the the linear model.
set.seed(1)
Auto_svm$mpglevel <- as.factor(Auto_svm$mpglevel)
var_pairs <- list(c("weight", "horsepower"),
c("displacement", "weight"),
c("acceleration", "cylinders"))
best_auto_model_linear <- svm(mpglevel ~ ., data = Auto_svm, kernel = "linear", cost = 1)
best_auto_model_radial <- svm(mpglevel ~ ., data = Auto_svm, kernel = "radial", cost = 10, gamma = 0.1)
best_auto_model_poly <- svm(mpglevel ~ ., data = Auto_svm, kernel = "polynomial", cost = 10, degree = 2, scale = 1)## Warning in any(scale): coercing argument of type 'double' to logical
for (pair in var_pairs) {
formula <- as.formula(paste(pair[1], "~", pair[2]))
xlim <- range(Auto_svm[[pair[2]]])
offset <- diff(xlim) * 0.13
plot(best_auto_model_linear, Auto_svm, formula,
cex.lab = 1.2, cex.axis = 1.1)
mtext(paste("Linear SVM: ", pair[1], "~", pair[2]),
side = 3, line = 0.5, at = mean(xlim) - offset, cex = 1, adj = 0.5)
plot(best_auto_model_radial, Auto_svm, formula,
cex.lab = 1.2, cex.axis = 1.1)
mtext(paste("Radial SVM: ", pair[1], "~", pair[2]),
side = 3, line = 0.5, at = mean(xlim) - offset, cex = 1, adj = 0.5)
plot(best_auto_model_poly, Auto_svm, formula,
cex.lab = 1.2, cex.axis = 1.1)
mtext(paste("Poly SVM: ", pair[1], "~", pair[2]),
side = 3, line = 0.5, at = mean(xlim) - offset, cex = 1, adj = 0.5)
}OJ data set which is part of
the ISLR2 package.
cost = 0.01, with
Purchase as the response and the other variables as
predictors. Use the summary() function to produce summary
statistics, and describe the results obtained.
##
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "linear", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: linear
## cost: 0.01
##
## Number of Support Vectors: 435
##
## ( 219 216 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
435 support vectors have been created. 219 vectors belong to level CH and 216 vectors belong to level MM.
train_pred <- predict(oj_svm, oj_train)
train_error <- mean(train_pred != oj_train$Purchase)
test_pred <- predict(oj_svm, oj_test)
test_error <- mean(test_pred != oj_test$Purchase)
cat("Training Error Rate:", round(train_error, 4), "\n")## Training Error Rate: 0.175
## Test Error Rate: 0.1778
tune()
function to select an optimal cost. Consider values in the range 0.01 to
10.
set.seed(1)
tune_oj <- tune(e1071::svm, Purchase ~ ., data = oj_train,
kernel = "linear",
ranges = list(cost = c(0.01, 0.1, 0.5, 1, 5, 10)))
summary(tune_oj)##
## Parameter tuning of 'e1071::svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 0.5
##
## - best performance: 0.16875
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.17625 0.02853482
## 2 0.10 0.17250 0.03162278
## 3 0.50 0.16875 0.02651650
## 4 1.00 0.17500 0.02946278
## 5 5.00 0.17250 0.03162278
## 6 10.00 0.17375 0.03197764
The smallest error is found when the cost is 0.5. The accuracy is around 83.12%.
cost.
best_oj <- tune_oj$best.model
train_pred_best <- predict(best_oj, oj_train)
train_error_best <- mean(train_pred_best != oj_train$Purchase)
test_pred_best <- predict(best_oj, oj_test)
test_error_best <- mean(test_pred_best != oj_test$Purchase)
cat("Training Error Rate (Best Cost):", round(train_error_best, 4), "\n")## Training Error Rate (Best Cost): 0.165
## Test Error Rate (Best Cost): 0.1556
gamma.
set.seed(1)
oj_svm_radial <- svm(Purchase ~ ., data = oj_train, kernel = "radial", cost = 0.01)
summary(oj_svm_radial)##
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "radial", cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 0.01
##
## Number of Support Vectors: 634
##
## ( 319 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_pred_radial <- predict(oj_svm_radial, oj_train)
train_error_radial <- mean(train_pred_radial != oj_train$Purchase)
test_pred_radial <- predict(oj_svm_radial, oj_test)
test_error_radial <- mean(test_pred_radial != oj_test$Purchase)
cat("Training Error Rate (Radial, cost=0.01):", round(train_error_radial, 4), "\n")## Training Error Rate (Radial, cost=0.01): 0.3938
## Test Error Rate (Radial, cost=0.01): 0.3778
tune_radial_oj <- tune(e1071::svm, Purchase ~ ., data = oj_train,
kernel = "radial",
ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_radial_oj)##
## Parameter tuning of 'e1071::svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 1
##
## - best performance: 0.17125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39375 0.04007372
## 2 0.10 0.18625 0.02853482
## 3 1.00 0.17125 0.02128673
## 4 10.00 0.18625 0.02853482
best_radial_svm <- tune_radial_oj$best.model
train_pred_best_radial <- predict(best_radial_svm, oj_train)
train_error_best_radial <- mean(train_pred_best_radial != oj_train$Purchase)
test_pred_best_radial <- predict(best_radial_svm, oj_test)
test_error_best_radial <- mean(test_pred_best_radial != oj_test$Purchase)
cat("Training Error Rate (Radial, Best Cost):", round(train_error_best_radial, 4), "\n")## Training Error Rate (Radial, Best Cost): 0.1512
## Test Error Rate (Radial, Best Cost): 0.1852
A total of 634 support vectors are made with 319 belonging to CH and 315 belonging to MM. This model has a training error of 39.38% and testing error of 37.78%. The lowest error can be found when cost is 1 with an accuracy of 82.87%. The training error of this best model is 15.12% and the testing error is 18.52%.
degree = 2.
set.seed(1)
oj_svm_poly <- svm(Purchase ~ ., data = oj_train,
kernel = "polynomial", degree = 2, cost = 0.01)
summary(oj_svm_poly)##
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "polynomial",
## degree = 2, cost = 0.01)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: polynomial
## cost: 0.01
## degree: 2
## coef.0: 0
##
## Number of Support Vectors: 636
##
## ( 321 315 )
##
##
## Number of Classes: 2
##
## Levels:
## CH MM
train_pred_poly <- predict(oj_svm_poly, oj_train)
train_error_poly <- mean(train_pred_poly != oj_train$Purchase)
test_pred_poly <- predict(oj_svm_poly, oj_test)
test_error_poly <- mean(test_pred_poly != oj_test$Purchase)
cat("Training Error Rate (Poly, cost=0.01):", round(train_error_poly, 4), "\n")## Training Error Rate (Poly, cost=0.01): 0.3725
## Test Error Rate (Poly, cost=0.01): 0.3667
tune_poly_oj <- tune(e1071::svm, Purchase ~ ., data = oj_train,
kernel = "polynomial", degree = 2,
ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune_poly_oj)##
## Parameter tuning of 'e1071::svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.18125
##
## - Detailed performance results:
## cost error dispersion
## 1 0.01 0.39125 0.04210189
## 2 0.10 0.32125 0.05001736
## 3 1.00 0.20250 0.04116363
## 4 10.00 0.18125 0.02779513
best_poly_svm <- tune_poly_oj$best.model
train_pred_best_poly <- predict(best_poly_svm, oj_train)
train_error_best_poly <- mean(train_pred_best_poly != oj_train$Purchase)
test_pred_best_poly <- predict(best_poly_svm, oj_test)
test_error_best_poly <- mean(test_pred_best_poly != oj_test$Purchase)
cat("Training Error Rate (Poly, Best Cost):", round(train_error_best_poly, 4), "\n")## Training Error Rate (Poly, Best Cost): 0.15
## Test Error Rate (Poly, Best Cost): 0.1889
A total of 636 support vectors are made with 321 belonging to CH and 315 belonging to MM. This model has a training error of 37.25% and testing error of 36.67%. The lowest error can be found when cost is 10 with an accuracy of 81.87%%. The training error of this best model is 15% and the testing error is 18.89%.
error_comparison <- data.frame(
Kernel = c("Linear", "Radial", "Polynomial"),
Training_Accuracy = 1 - c(train_error_best, train_error_best_radial, train_error_best_poly),
Test_Accuracy = 1 - c(test_error_best, test_error_best_radial, test_error_best_poly)
)
error_comparison %>%
mutate(
Training_Accuracy = sprintf("%.2f%%", Training_Accuracy * 100),
Test_Accuracy = sprintf("%.2f%%", Test_Accuracy * 100)
) %>%
kable(caption = "Comparison of Training and Test Error Rates Across Kernels")| Kernel | Training_Accuracy | Test_Accuracy |
|---|---|---|
| Linear | 83.50% | 84.44% |
| Radial | 84.88% | 81.48% |
| Polynomial | 85.00% | 81.11% |
The linear kernel has the highest testing accuracy and seems to give the best results on this data.