Load Packages
library(tidyverse)
library(ISLR2)
library(pROC)
Load & Prepare Data
data(Default)
Default <- Default %>%
mutate(
default_num = if_else(default == "Yes", 1L, 0L),
student_num = if_else(student == "Yes", 1L, 0L)
)
glimpse(Default)
## Rows: 10,000
## Columns: 6
## $ default <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No…
## $ student <fct> No, Yes, No, No, No, Yes, No, Yes, No, No, Yes, Yes, No, N…
## $ balance <dbl> 729.5265, 817.1804, 1073.5492, 529.2506, 785.6559, 919.588…
## $ income <dbl> 44361.625, 12106.135, 31767.139, 35704.494, 38463.496, 749…
## $ default_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ student_num <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…
Fit Logistic Regression Models
# Table 4.1 – balance as sole predictor
fit1 <- glm(default_num ~ balance, data = Default, family = binomial)
# Table 4.2 – student status as sole predictor
fit2 <- glm(default_num ~ student_num, data = Default, family = binomial)
# Table 4.3 – balance, income, and student together
fit3 <- glm(default_num ~ balance + income + student_num,
data = Default, family = binomial)
Model Summaries
summary(fit1)$coefficients
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.651330614 0.3611573721 -29.49221 3.623124e-191
## balance 0.005498917 0.0002203702 24.95309 1.976602e-137
summary(fit2)$coefficients
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5041278 0.07071301 -49.554219 0.0000000000
## student_num 0.4048871 0.11501883 3.520181 0.0004312529
summary(fit3)$coefficients
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.086905e+01 4.922555e-01 -22.080088 4.911280e-108
## balance 5.736505e-03 2.318945e-04 24.737563 4.219578e-135
## income 3.033450e-06 8.202615e-06 0.369815 7.115203e-01
## student_num -6.467758e-01 2.362525e-01 -2.737646 6.188063e-03
Compute ROC Objects
roc1 <- roc(Default$default_num, fitted(fit1), quiet = TRUE)
roc2 <- roc(Default$default_num, fitted(fit2), quiet = TRUE)
roc3 <- roc(Default$default_num, fitted(fit3), quiet = TRUE)
Individual ROC Plots
Table 4.1 – Balance Only
plot(roc1,
col = "steelblue",
lwd = 2,
main = "ROC Curve – Table 4.1: Balance Only",
xlab = "False Positive Rate",
ylab = "True Positive Rate")
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
legend = paste0("AUC = ", round(auc(roc1), 4)),
col = "steelblue", lwd = 2, bty = "n")

Table 4.2 – Student Only
plot(roc2,
col = "darkorange",
lwd = 2,
main = "ROC Curve – Table 4.2: Student Only",
xlab = "False Positive Rate",
ylab = "True Positive Rate")
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
legend = paste0("AUC = ", round(auc(roc2), 4)),
col = "darkorange", lwd = 2, bty = "n")

Table 4.3 – Balance + Income + Student
plot(roc3,
col = "forestgreen",
lwd = 2,
main = "ROC Curve – Table 4.3: Balance + Income + Student",
xlab = "False Positive Rate",
ylab = "True Positive Rate")
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
legend = paste0("AUC = ", round(auc(roc3), 4)),
col = "forestgreen", lwd = 2, bty = "n")

Combined ROC Plot
plot(roc1,
col = "steelblue",
lwd = 2,
main = "ROC Curves – Default Dataset (Tables 4.1 / 4.2 / 4.3)",
xlab = "False Positive Rate",
ylab = "True Positive Rate")
plot(roc2, col = "darkorange", lwd = 2, add = TRUE)
plot(roc3, col = "forestgreen", lwd = 2, add = TRUE)
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
legend = c(
paste0("Table 4.1: balance only (AUC = ", round(auc(roc1), 4), ")"),
paste0("Table 4.2: student only (AUC = ", round(auc(roc2), 4), ")"),
paste0("Table 4.3: balance+income+student (AUC = ", round(auc(roc3), 4), ")")
),
col = c("steelblue", "darkorange", "forestgreen"),
lwd = 2, bty = "n", cex = 0.85)

AUC Comparison
auc_table <- tibble(
Model = c("Table 4.1: balance only",
"Table 4.2: student only",
"Table 4.3: balance + income + student"),
Predictors = c("balance", "student", "balance, income, student"),
AUC = round(c(as.numeric(auc(roc1)),
as.numeric(auc(roc2)),
as.numeric(auc(roc3))), 4)
) %>%
arrange(desc(AUC))
knitr::kable(auc_table, caption = "AUC comparison across the three models")
AUC comparison across the three models
| Table 4.3: balance + income + student |
balance, income, student |
0.9496 |
| Table 4.1: balance only |
balance |
0.9480 |
| Table 4.2: student only |
student |
0.5450 |