library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Load libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load dataset
data <- read.csv("downloads/Default.csv")
# Inspect
str(data)
## 'data.frame': 10000 obs. of 5 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ default: chr "No" "No" "No" "No" ...
## $ student: chr "No" "Yes" "No" "No" ...
## $ balance: num 730 817 1074 529 786 ...
## $ income : num 44362 12106 31767 35704 38463 ...
summary(data)
## X default student balance
## Min. : 1 Length:10000 Length:10000 Min. : 0.0
## 1st Qu.: 2501 Class :character Class :character 1st Qu.: 481.7
## Median : 5000 Mode :character Mode :character Median : 823.6
## Mean : 5000 Mean : 835.4
## 3rd Qu.: 7500 3rd Qu.:1166.3
## Max. :10000 Max. :2654.3
## income
## Min. : 772
## 1st Qu.:21340
## Median :34553
## Mean :33517
## 3rd Qu.:43808
## Max. :73554
# Convert default to binary (1 = Yes, 0 = No)
data$default <- ifelse(data$default == "Yes", 1, 0)
# Model 1
model1 <- glm(default ~ income, data = data, family = binomial)
# Model 2
model2 <- glm(default ~ balance, data = data, family = binomial)
# Model 3
model3 <- glm(default ~ income + balance + student, data = data, family = binomial)
data$prob1 <- predict(model1, type = "response")
data$prob2 <- predict(model2, type = "response")
data$prob3 <- predict(model3, type = "response")
- ROC Curve Calculation
roc1 <- roc(data$default, data$prob1)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc2 <- roc(data$default, data$prob2)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc3 <- roc(data$default, data$prob3)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
- Plot ROC Curves
plot(roc1, col = "blue", lwd = 2, main = "ROC Curves Comparison")
plot(roc2, col = "red", lwd = 2, add = TRUE)
plot(roc3, col = "green", lwd = 2, add = TRUE)
legend("bottomright",
legend = c("Model 1 (Income)",
"Model 2 (Balance)",
"Model 3 (Full Model)"),
col = c("blue", "red", "green"),
lwd = 2)

- AUC Comparison
auc(roc1)
## Area under the curve: 0.5327
auc(roc2)
## Area under the curve: 0.948
auc(roc3)
## Area under the curve: 0.9496