Load Libraries
# Install packages if not already installed
# install.packages(c("tidyverse", "MASS", "pROC", "caTools"))
library(tidyverse)
library(MASS)
library(pROC)
library(caTools)
1. Load Data
Default <- read.csv("Default.csv")
# Convert default and student to factor
Default$default <- as.factor(Default$default)
Default$student <- as.factor(Default$student)
str(Default)
## 'data.frame': 10000 obs. of 5 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
## $ balance: num 730 817 1074 529 786 ...
## $ income : num 44362 12106 31767 35704 38463 ...
2. Train-Test Split
set.seed(123)
split <- sample.split(Default$default, SplitRatio = 0.7)
train <- subset(Default, split == TRUE)
test <- subset(Default, split == FALSE)
cat("Train size:", nrow(train), "\n")
## Train size: 7000
cat("Test size: ", nrow(test), "\n")
## Test size: 3000
3. Logistic Regression
glm_model <- glm(default ~ income + balance,
data = train,
family = binomial)
glm_probs <- predict(glm_model, test, type = "response")
4. LDA
lda_model <- lda(default ~ income + balance, data = train)
lda_pred <- predict(lda_model, test)
lda_probs <- lda_pred$posterior[, 2] # posterior prob for positive class
5. QDA
qda_model <- qda(default ~ income + balance, data = train)
qda_pred <- predict(qda_model, test)
qda_probs <- qda_pred$posterior[, 2] # posterior prob for positive class
6. ROC Curves
# FIX: specify levels and direction explicitly to avoid
# ambiguous positive-class warnings and potential AUC < 0.5 flip.
# levels = c(negative, positive); direction = "<" means
# lower predictor score -> negative class (standard convention).
pos_class <- levels(test$default)[2] # second level is the positive class ("Yes")
roc_glm <- roc(test$default, glm_probs,
levels = c(levels(test$default)[1], pos_class),
direction = "<")
roc_lda <- roc(test$default, lda_probs,
levels = c(levels(test$default)[1], pos_class),
direction = "<")
roc_qda <- roc(test$default, qda_probs,
levels = c(levels(test$default)[1], pos_class),
direction = "<")
7. Plot ROC Curves
plot(roc_glm,
col = "blue",
lwd = 2,
main = "ROC Curves — Default Prediction")
plot(roc_lda, col = "red", lwd = 2, add = TRUE)
plot(roc_qda, col = "green", lwd = 2, add = TRUE)
legend("bottomright",
legend = c(
paste("Logistic AUC =", round(auc(roc_glm), 3)),
paste("LDA AUC =", round(auc(roc_lda), 3)),
paste("QDA AUC =", round(auc(roc_qda), 3))
),
col = c("blue", "red", "green"),
lwd = 2,
bty = "n") # FIX: remove legend box border for cleaner look

8. AUC Summary
cat("Logistic Regression AUC:", round(auc(roc_glm), 4), "\n")
## Logistic Regression AUC: 0.9494
cat("LDA AUC:", round(auc(roc_lda), 4), "\n")
## LDA AUC: 0.9493
cat("QDA AUC:", round(auc(roc_qda), 4), "\n")
## QDA AUC: 0.9489