Load Libraries

# Install packages if not already installed
# install.packages(c("tidyverse", "MASS", "pROC", "caTools"))

library(tidyverse)
library(MASS)
library(pROC)
library(caTools)

1. Load Data

Default <- read.csv("Default.csv")

# Convert default and student to factor
Default$default <- as.factor(Default$default)
Default$student <- as.factor(Default$student)

str(Default)
## 'data.frame':    10000 obs. of  5 variables:
##  $ X      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
##  $ balance: num  730 817 1074 529 786 ...
##  $ income : num  44362 12106 31767 35704 38463 ...

2. Train-Test Split

set.seed(123)
split <- sample.split(Default$default, SplitRatio = 0.7)
train <- subset(Default, split == TRUE)
test  <- subset(Default, split == FALSE)

cat("Train size:", nrow(train), "\n")
## Train size: 7000
cat("Test size: ", nrow(test),  "\n")
## Test size:  3000

3. Logistic Regression

glm_model <- glm(default ~ income + balance,
                 data   = train,
                 family = binomial)

glm_probs <- predict(glm_model, test, type = "response")

4. LDA

lda_model <- lda(default ~ income + balance, data = train)
lda_pred  <- predict(lda_model, test)
lda_probs <- lda_pred$posterior[, 2]   # posterior prob for positive class

5. QDA

qda_model <- qda(default ~ income + balance, data = train)
qda_pred  <- predict(qda_model, test)
qda_probs <- qda_pred$posterior[, 2]   # posterior prob for positive class

6. ROC Curves

# FIX: specify levels and direction explicitly to avoid
# ambiguous positive-class warnings and potential AUC < 0.5 flip.
# levels = c(negative, positive); direction = "<" means
# lower predictor score -> negative class (standard convention).
pos_class <- levels(test$default)[2]   # second level is the positive class ("Yes")

roc_glm <- roc(test$default, glm_probs,
               levels    = c(levels(test$default)[1], pos_class),
               direction = "<")

roc_lda <- roc(test$default, lda_probs,
               levels    = c(levels(test$default)[1], pos_class),
               direction = "<")

roc_qda <- roc(test$default, qda_probs,
               levels    = c(levels(test$default)[1], pos_class),
               direction = "<")

7. Plot ROC Curves

plot(roc_glm,
     col  = "blue",
     lwd  = 2,
     main = "ROC Curves — Default Prediction")

plot(roc_lda, col = "red",   lwd = 2, add = TRUE)
plot(roc_qda, col = "green", lwd = 2, add = TRUE)

legend("bottomright",
       legend = c(
         paste("Logistic  AUC =", round(auc(roc_glm), 3)),
         paste("LDA       AUC =", round(auc(roc_lda), 3)),
         paste("QDA       AUC =", round(auc(roc_qda), 3))
       ),
       col = c("blue", "red", "green"),
       lwd = 2,
       bty = "n")   # FIX: remove legend box border for cleaner look

8. AUC Summary

cat("Logistic Regression AUC:", round(auc(roc_glm), 4), "\n")
## Logistic Regression AUC: 0.9494
cat("LDA                 AUC:", round(auc(roc_lda), 4), "\n")
## LDA                 AUC: 0.9493
cat("QDA                 AUC:", round(auc(roc_qda), 4), "\n")
## QDA                 AUC: 0.9489