Overview

This report fits three logistic regression models based on Tables 4.1, 4.2, and 4.3 from An Introduction to Statistical Learning (ISLR) using the Default dataset, and plots their ROC curves.

Table Predictors
4.1 balance only
4.2 student only
4.3 balance + income + student

Load Libraries and Data

library(ISLR2)       # Default dataset
library(pROC)        # ROC curves
library(ggplot2)     # Plotting

data(Default)
head(Default)
##   default student   balance    income
## 1      No      No  729.5265 44361.625
## 2      No     Yes  817.1804 12106.135
## 3      No      No 1073.5492 31767.139
## 4      No      No  529.2506 35704.494
## 5      No      No  785.6559 38463.496
## 6      No     Yes  919.5885  7491.559

Fit Logistic Regression Models

# Table 4.1: balance only
model_4_1 <- glm(default ~ balance, data = Default, family = binomial)

# Table 4.2: student only
model_4_2 <- glm(default ~ student, data = Default, family = binomial)

# Table 4.3: balance + income + student
model_4_3 <- glm(default ~ balance + income + student, data = Default, family = binomial)

summary(model_4_3)
## 
## Call:
## glm(formula = default ~ balance + income + student, family = binomial, 
##     data = Default)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.087e+01  4.923e-01 -22.080  < 2e-16 ***
## balance      5.737e-03  2.319e-04  24.738  < 2e-16 ***
## income       3.033e-06  8.203e-06   0.370  0.71152    
## studentYes  -6.468e-01  2.363e-01  -2.738  0.00619 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2920.6  on 9999  degrees of freedom
## Residual deviance: 1571.5  on 9996  degrees of freedom
## AIC: 1579.5
## 
## Number of Fisher Scoring iterations: 8

Compute ROC Curves

# Predicted probabilities
prob_4_1 <- predict(model_4_1, type = "response")
prob_4_2 <- predict(model_4_2, type = "response")
prob_4_3 <- predict(model_4_3, type = "response")

# ROC objects
roc_4_1 <- roc(Default$default, prob_4_1, quiet = TRUE)
roc_4_2 <- roc(Default$default, prob_4_2, quiet = TRUE)
roc_4_3 <- roc(Default$default, prob_4_3, quiet = TRUE)

# AUC values
cat("AUC - Table 4.1 (balance):              ", round(auc(roc_4_1), 4), "\n")
## AUC - Table 4.1 (balance):               0.948
cat("AUC - Table 4.2 (student):              ", round(auc(roc_4_2), 4), "\n")
## AUC - Table 4.2 (student):               0.545
cat("AUC - Table 4.3 (balance+income+student):", round(auc(roc_4_3), 4), "\n")
## AUC - Table 4.3 (balance+income+student): 0.9496

Plot ROC Curves

# Build data frames for ggplot
make_roc_df <- function(roc_obj, label) {
  data.frame(
    FPR   = 1 - roc_obj$specificities,
    TPR   = roc_obj$sensitivities,
    Model = label
  )
}

df_roc <- rbind(
  make_roc_df(roc_4_1, paste0("Table 4.1 – balance only (AUC = ",        round(auc(roc_4_1), 3), ")")),
  make_roc_df(roc_4_2, paste0("Table 4.2 – student only (AUC = ",        round(auc(roc_4_2), 3), ")")),
  make_roc_df(roc_4_3, paste0("Table 4.3 – balance + income + student (AUC = ", round(auc(roc_4_3), 3), ")"))
)

ggplot(df_roc, aes(x = FPR, y = TPR, color = Model, linetype = Model)) +
  geom_line(linewidth = 1.1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted", color = "gray50") +
  scale_color_manual(values = c("#2196F3", "#FF9800", "#4CAF50")) +
  scale_linetype_manual(values = c("solid", "dashed", "dotdash")) +
  labs(
    title    = "ROC Curves – ISLR Default Dataset",
    subtitle = "Logistic Regression Models from Tables 4.1, 4.2, and 4.3",
    x        = "False Positive Rate (1 – Specificity)",
    y        = "True Positive Rate (Sensitivity)",
    color    = "Model",
    linetype = "Model"
  ) +
  theme_bw(base_size = 13) +
  theme(
    legend.position  = "bottom",
    legend.direction = "vertical",
    plot.title       = element_text(face = "bold")
  )


Interpretation