Load Packages

library(tidyverse)
library(ISLR2)
library(pROC)

Load & Prepare Data

data(Default)

Default <- Default %>%
  mutate(
    default_num = if_else(default == "Yes", 1L, 0L),
    student_num = if_else(student == "Yes", 1L, 0L)
  )

glimpse(Default)
## Rows: 10,000
## Columns: 6
## $ default     <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No…
## $ student     <fct> No, Yes, No, No, No, Yes, No, Yes, No, No, Yes, Yes, No, N…
## $ balance     <dbl> 729.5265, 817.1804, 1073.5492, 529.2506, 785.6559, 919.588…
## $ income      <dbl> 44361.625, 12106.135, 31767.139, 35704.494, 38463.496, 749…
## $ default_num <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ student_num <int> 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0…

Fit Logistic Regression Models

# Table 4.1 – balance as sole predictor
fit1 <- glm(default_num ~ balance, data = Default, family = binomial)

# Table 4.2 – student status as sole predictor
fit2 <- glm(default_num ~ student_num, data = Default, family = binomial)

# Table 4.3 – balance, income, and student together
fit3 <- glm(default_num ~ balance + income + student_num,
            data = Default, family = binomial)

Model Summaries

summary(fit1)$coefficients
##                  Estimate   Std. Error   z value      Pr(>|z|)
## (Intercept) -10.651330614 0.3611573721 -29.49221 3.623124e-191
## balance       0.005498917 0.0002203702  24.95309 1.976602e-137
summary(fit2)$coefficients
##               Estimate Std. Error    z value     Pr(>|z|)
## (Intercept) -3.5041278 0.07071301 -49.554219 0.0000000000
## student_num  0.4048871 0.11501883   3.520181 0.0004312529
summary(fit3)$coefficients
##                  Estimate   Std. Error    z value      Pr(>|z|)
## (Intercept) -1.086905e+01 4.922555e-01 -22.080088 4.911280e-108
## balance      5.736505e-03 2.318945e-04  24.737563 4.219578e-135
## income       3.033450e-06 8.202615e-06   0.369815  7.115203e-01
## student_num -6.467758e-01 2.362525e-01  -2.737646  6.188063e-03

Compute ROC Objects

roc1 <- roc(Default$default_num, fitted(fit1), quiet = TRUE)
roc2 <- roc(Default$default_num, fitted(fit2), quiet = TRUE)
roc3 <- roc(Default$default_num, fitted(fit3), quiet = TRUE)

Individual ROC Plots

Table 4.1 – Balance Only

plot(roc1,
     col       = "steelblue",
     lwd       = 2,
     main      = "ROC Curve – Table 4.1: Balance Only",
     xlab      = "False Positive Rate",
     ylab      = "True Positive Rate")
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
       legend = paste0("AUC = ", round(auc(roc1), 4)),
       col = "steelblue", lwd = 2, bty = "n")

Table 4.2 – Student Only

plot(roc2,
     col       = "darkorange",
     lwd       = 2,
     main      = "ROC Curve – Table 4.2: Student Only",
     xlab      = "False Positive Rate",
     ylab      = "True Positive Rate")
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
       legend = paste0("AUC = ", round(auc(roc2), 4)),
       col = "darkorange", lwd = 2, bty = "n")

Table 4.3 – Balance + Income + Student

plot(roc3,
     col       = "forestgreen",
     lwd       = 2,
     main      = "ROC Curve – Table 4.3: Balance + Income + Student",
     xlab      = "False Positive Rate",
     ylab      = "True Positive Rate")
abline(a = 0, b = 1, lty = 2, col = "gray60")
legend("bottomright",
       legend = paste0("AUC = ", round(auc(roc3), 4)),
       col = "forestgreen", lwd = 2, bty = "n")

Combined ROC Plot

plot(roc1,
     col  = "steelblue",
     lwd  = 2,
     main = "ROC Curves – Default Dataset (Tables 4.1 / 4.2 / 4.3)",
     xlab = "False Positive Rate",
     ylab = "True Positive Rate")
plot(roc2, col = "darkorange",  lwd = 2, add = TRUE)
plot(roc3, col = "forestgreen", lwd = 2, add = TRUE)
abline(a = 0, b = 1, lty = 2, col = "gray60")

legend("bottomright",
       legend = c(
         paste0("Table 4.1: balance only (AUC = ",        round(auc(roc1), 4), ")"),
         paste0("Table 4.2: student only (AUC = ",        round(auc(roc2), 4), ")"),
         paste0("Table 4.3: balance+income+student (AUC = ", round(auc(roc3), 4), ")")
       ),
       col = c("steelblue", "darkorange", "forestgreen"),
       lwd = 2, bty = "n", cex = 0.85)

AUC Comparison

auc_table <- tibble(
  Model      = c("Table 4.1: balance only",
                 "Table 4.2: student only",
                 "Table 4.3: balance + income + student"),
  Predictors = c("balance", "student", "balance, income, student"),
  AUC        = round(c(as.numeric(auc(roc1)),
                       as.numeric(auc(roc2)),
                       as.numeric(auc(roc3))), 4)
) %>%
  arrange(desc(AUC))

knitr::kable(auc_table, caption = "AUC comparison across the three models")
AUC comparison across the three models
Model Predictors AUC
Table 4.3: balance + income + student balance, income, student 0.9496
Table 4.1: balance only balance 0.9480
Table 4.2: student only student 0.5450