library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
# Load libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load dataset
data <- read.csv("downloads/Default.csv")
# Inspect
str(data)
## 'data.frame':    10000 obs. of  5 variables:
##  $ X      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ default: chr  "No" "No" "No" "No" ...
##  $ student: chr  "No" "Yes" "No" "No" ...
##  $ balance: num  730 817 1074 529 786 ...
##  $ income : num  44362 12106 31767 35704 38463 ...
summary(data)
##        X           default            student             balance      
##  Min.   :    1   Length:10000       Length:10000       Min.   :   0.0  
##  1st Qu.: 2501   Class :character   Class :character   1st Qu.: 481.7  
##  Median : 5000   Mode  :character   Mode  :character   Median : 823.6  
##  Mean   : 5000                                         Mean   : 835.4  
##  3rd Qu.: 7500                                         3rd Qu.:1166.3  
##  Max.   :10000                                         Max.   :2654.3  
##      income     
##  Min.   :  772  
##  1st Qu.:21340  
##  Median :34553  
##  Mean   :33517  
##  3rd Qu.:43808  
##  Max.   :73554
# Convert default to binary (1 = Yes, 0 = No)
data$default <- ifelse(data$default == "Yes", 1, 0)
# Model 1
model1 <- glm(default ~ income, data = data, family = binomial)

# Model 2
model2 <- glm(default ~ balance, data = data, family = binomial)

# Model 3
model3 <- glm(default ~ income + balance + student, data = data, family = binomial)
data$prob1 <- predict(model1, type = "response")
data$prob2 <- predict(model2, type = "response")
data$prob3 <- predict(model3, type = "response")
  1. ROC Curve Calculation
roc1 <- roc(data$default, data$prob1)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc2 <- roc(data$default, data$prob2)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc3 <- roc(data$default, data$prob3)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
  1. Plot ROC Curves
plot(roc1, col = "blue", lwd = 2, main = "ROC Curves Comparison")
plot(roc2, col = "red", lwd = 2, add = TRUE)
plot(roc3, col = "green", lwd = 2, add = TRUE)

legend("bottomright",
       legend = c("Model 1 (Income)",
                  "Model 2 (Balance)",
                  "Model 3 (Full Model)"),
       col = c("blue", "red", "green"),
       lwd = 2)

  1. AUC Comparison
auc(roc1)
## Area under the curve: 0.5327
auc(roc2)
## Area under the curve: 0.948
auc(roc3)
## Area under the curve: 0.9496