library(tidyverse)
library(caret)
library(randomForest)
library(rpart)
library(adabag)
library(pROC)
library(ada)
library(mice)
library(e1071)
set.seed(02180)
bank <- read.csv("C:\\Users\\jashb\\OneDrive\\Documents\\Masters Data Science\\Spring 2025\\DATA 622\\Assignment 1\\DATA\\bank-additional\\bank-additional-full.csv", sep = ';')
bank <- bank %>%
mutate(across(everything(), ~ ifelse(. == "unknown", NA, .)))
bank_comp <- na.omit(bank)
Training and testing data is split at 75%|25% split for the first default experiment
trainPart <- createDataPartition(bank_comp$y, p = 0.75, list = F)
trainDat <- bank_comp[trainPart, ]
testDat <- bank_comp[-trainPart, ]
trainDat$y <- as.factor(trainDat$y)
testDat$y <- as.factor(testDat$y)
Perform an analysis of the dataset used in Homework #2 using the SVM algorithm.
svm_lin <- svm(formula = y ~ ., data = trainDat, cost = 1, probability = T, scale = T)
svm_lin_pred <- predict(svm_lin, testDat)
svm_lin_prob <-predict(svm_lin, testDat, probability = T)
svm_confusion <- confusionMatrix(svm_lin_pred, testDat$y, positive = "yes")
svm_lin_accuracy <- svm_confusion$overall["Accuracy"]
svm_lin_sensitivity <-svm_confusion$byClass["Sensitivity"]
svm_lin_specificity <-svm_confusion$byClass["Specificity"]
svm_ROC_lin <- roc(testDat$y, as.numeric(attr(svm_lin_prob, "probabilities")[,"yes"]))
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
svm_lin_auc <- auc(svm_ROC_lin)
svm_lin_results <- data.frame(
Method = "SVM Linear Algorithm",
AUC = as.numeric(svm_lin_auc),
Accuracy = svm_lin_accuracy,
Sensitivity = svm_lin_sensitivity,
Specificity = svm_lin_specificity
)
print(svm_lin_results)
## Method AUC Accuracy Sensitivity Specificity
## Accuracy SVM Linear Algorithm 0.9197253 0.8981761 0.3578838 0.9764158
tune_lin_grid <- expand.grid(cost = c(0.001, 0.01, 0.1, 1, 5, 10))
tune_lin_control <- tune.svm(y ~ ., data = trainDat, kernel = "linear",
cost = tune_lin_grid$cost, tunecontrol = tune.control(cross = 2))
print(tune_lin_control)
##
## Parameter tuning of 'svm':
##
## - sampling method: 2-fold cross validation
##
## - best parameters:
## cost
## 10
##
## - best performance: 0.1059169
Pull best cost for the linear kernel and train the model with it
tuned_lin_cost <- tune_lin_control$best.parameters$cost
svm_lin_tune <- svm(y ~., data = trainDat, cost = tuned_lin_cost, probability = T)
svm_tune_pred <- predict(svm_lin_tune, testDat)
svm_tune_prob <-predict(svm_lin_tune, testDat, probability = T)
svm_tune_confusion <- confusionMatrix(svm_tune_pred, testDat$y, positive = "yes")
svm_tune_accuracy <- svm_tune_confusion$overall["Accuracy"]
svm_tune_sensitivity <-svm_tune_confusion$byClass["Sensitivity"]
svm_tune_specificity <-svm_tune_confusion$byClass["Specificity"]
svm_ROC_tune <- roc(testDat$y, as.numeric(attr(svm_tune_prob, "probabilities")[,"yes"]))
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
svm_tune_auc <- auc(svm_ROC_tune)
svm_tune_results <- data.frame(
Method = "SVM Linear Algorithm (Tuned)",
AUC = as.numeric(svm_tune_auc),
Accuracy = svm_tune_accuracy,
Sensitivity = svm_tune_sensitivity,
Specificity = svm_tune_specificity
)
print(svm_tune_results)
## Method AUC Accuracy Sensitivity
## Accuracy SVM Linear Algorithm (Tuned) 0.9215606 0.9006692 0.4045643
## Specificity
## Accuracy 0.9725101
svm_rad <- svm(formula = y ~ .,
data = trainDat,
kernel = "radial",
probability = TRUE,
scale = TRUE)
svm_rad_pred <- predict(svm_rad, testDat, type = "class")
svm_rad_prob <- predict(svm_rad, testDat, probability = TRUE)
svm_rad_confusion <- confusionMatrix(svm_rad_pred, testDat$y, positive = "yes")
svm_rad_accuracy <- svm_rad_confusion$overall["Accuracy"]
svm_rad_sensitivity <- svm_rad_confusion$byClass["Sensitivity"]
svm_rad_specificity <- svm_rad_confusion$byClass["Specificity"]
svm_rad_ROC <- roc(testDat$y, as.numeric(attr(svm_rad_prob, "probabilities")[,"yes"]))
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
svm_rad_auc <- auc(svm_rad_ROC)
svm_rad_results <- data.frame(
Method = "SVM Radial Algorithm (Standard)",
AUC = as.numeric(svm_rad_auc),
Sensitivity = svm_rad_sensitivity,
Accuracy = svm_rad_accuracy,
Specificity = svm_rad_specificity
)
combined_metrics <- rbind(svm_lin_results, svm_tune_results, svm_rad_results)
print(combined_metrics)
## Method AUC Accuracy Sensitivity
## Accuracy SVM Linear Algorithm 0.9197253 0.8981761 0.3578838
## Accuracy1 SVM Linear Algorithm (Tuned) 0.9215606 0.9006692 0.4045643
## Sensitivity SVM Radial Algorithm (Standard) 0.9197253 0.8981761 0.3578838
## Specificity
## Accuracy 0.9764158
## Accuracy1 0.9725101
## Sensitivity 0.9764158