ROC Curves for Default Dataset – Tables 4.1, 4.2, 4.3

Overview

This document fits the three logistic regression models described in ISLR Tables 4.1, 4.2, and 4.3 on the Default dataset and plots their ROC curves together for comparison.

Table	Model	Predictors
4.1	Model 1	`balance` only
4.2	Model 2	`income` only
4.3	Model 3	`balance + income + student`

1. Load Libraries and Data

library(tidyverse)
library(pROC)

# Load data
default_df <- read.csv("Default.csv")

# Convert outcome to binary factor
default_df$default <- factor(default_df$default, levels = c("No", "Yes"))
default_df$student <- factor(default_df$student, levels = c("No", "Yes"))

glimpse(default_df)

## Rows: 10,000
## Columns: 5
## $ X       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,…
## $ default <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No, No…
## $ student <fct> No, Yes, No, No, No, Yes, No, Yes, No, No, Yes, Yes, No, No, N…
## $ balance <dbl> 729.5265, 817.1804, 1073.5492, 529.2506, 785.6559, 919.5885, 8…
## $ income  <dbl> 44361.625, 12106.135, 31767.139, 35704.494, 38463.496, 7491.55…

2. Fit the Three Logistic Regression Models

# Table 4.1: balance only
model_41 <- glm(default ~ balance, data = default_df, family = binomial)

# Table 4.2: income only
model_42 <- glm(default ~ income, data = default_df, family = binomial)

# Table 4.3: balance + income + student
model_43 <- glm(default ~ balance + income + student, data = default_df, family = binomial)

summary(model_41)

## 
## Call:
## glm(formula = default ~ balance, family = binomial, data = default_df)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.065e+01  3.612e-01  -29.49   <2e-16 ***
## balance      5.499e-03  2.204e-04   24.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2920.6  on 9999  degrees of freedom
## Residual deviance: 1596.5  on 9998  degrees of freedom
## AIC: 1600.5
## 
## Number of Fisher Scoring iterations: 8

summary(model_42)

## 
## Call:
## glm(formula = default ~ income, family = binomial, data = default_df)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.094e+00  1.463e-01 -21.156   <2e-16 ***
## income      -8.353e-06  4.207e-06  -1.985   0.0471 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2920.6  on 9999  degrees of freedom
## Residual deviance: 2916.7  on 9998  degrees of freedom
## AIC: 2920.7
## 
## Number of Fisher Scoring iterations: 6

summary(model_43)

## 
## Call:
## glm(formula = default ~ balance + income + student, family = binomial, 
##     data = default_df)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.087e+01  4.923e-01 -22.080  < 2e-16 ***
## balance      5.737e-03  2.319e-04  24.738  < 2e-16 ***
## income       3.033e-06  8.203e-06   0.370  0.71152    
## studentYes  -6.468e-01  2.363e-01  -2.738  0.00619 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2920.6  on 9999  degrees of freedom
## Residual deviance: 1571.5  on 9996  degrees of freedom
## AIC: 1579.5
## 
## Number of Fisher Scoring iterations: 8

3. Generate Predicted Probabilities

pred_41 <- predict(model_41, type = "response")
pred_42 <- predict(model_42, type = "response")
pred_43 <- predict(model_43, type = "response")

4. Compute ROC Objects

roc_41 <- roc(default_df$default, pred_41, levels = c("No", "Yes"), direction = "<")
roc_42 <- roc(default_df$default, pred_42, levels = c("No", "Yes"), direction = "<")
roc_43 <- roc(default_df$default, pred_43, levels = c("No", "Yes"), direction = "<")

cat("AUC – Model 4.1 (balance):                  ", auc(roc_41), "\n")

## AUC – Model 4.1 (balance):                   0.9479785

cat("AUC – Model 4.2 (income):                   ", auc(roc_42), "\n")

## AUC – Model 4.2 (income):                    0.5326533

cat("AUC – Model 4.3 (balance+income+student):   ", auc(roc_43), "\n")

## AUC – Model 4.3 (balance+income+student):    0.9495581

5. Plot ROC Curves

# Build a combined data frame for ggplot
roc_to_df <- function(roc_obj, label) {
  data.frame(
    FPR   = 1 - roc_obj$specificities,
    TPR   = roc_obj$sensitivities,
    Model = label
  )
}

df_roc <- bind_rows(
  roc_to_df(roc_41, paste0("Table 4.1: balance  (AUC = ", round(auc(roc_41), 3), ")")),
  roc_to_df(roc_42, paste0("Table 4.2: income   (AUC = ", round(auc(roc_42), 3), ")")),
  roc_to_df(roc_43, paste0("Table 4.3: balance+income+student  (AUC = ", round(auc(roc_43), 3), ")"))
)

ggplot(df_roc, aes(x = FPR, y = TPR, color = Model)) +
  geom_line(linewidth = 1.1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
  scale_color_manual(values = c("#E74C3C", "#3498DB", "#2ECC71")) +
  labs(
    title    = "ROC Curves – Default Dataset",
    subtitle = "Logistic Regression Models from ISLR Tables 4.1, 4.2 & 4.3",
    x        = "False Positive Rate (1 – Specificity)",
    y        = "True Positive Rate (Sensitivity)",
    color    = "Model"
  ) +
  theme_bw(base_size = 13) +
  theme(
    legend.position  = c(0.62, 0.18),
    legend.background = element_rect(fill = "white", color = "grey80"),
    legend.text       = element_text(size = 10),
    plot.title        = element_text(face = "bold", size = 15),
    plot.subtitle     = element_text(size = 11, color = "grey40")
  ) +
  coord_equal()

6. AUC Summary Table

auc_table <- data.frame(
  Table   = c("Table 4.1", "Table 4.2", "Table 4.3"),
  Model   = c("balance only", "income only", "balance + income + student"),
  AUC     = round(c(auc(roc_41), auc(roc_42), auc(roc_43)), 4)
)

knitr::kable(auc_table, caption = "AUC for Each Model")

AUC for Each Model
Table	Model	AUC
Table 4.1	balance only	0.9480
Table 4.2	income only	0.5327
Table 4.3	balance + income + student	0.9496

7. Interpretation

Table 4.1 (balance only) achieves the highest AUC among single-predictor models, confirming that balance is by far the strongest predictor of default.
Table 4.2 (income only) yields an AUC near 0.5, indicating almost no discriminative power — income alone is a poor predictor.
Table 4.3 (balance + income + student) produces the best overall AUC, showing that combining all three predictors slightly improves discrimination over balance alone.

The ROC plot visually confirms these findings: the curves for Tables 4.1 and 4.3 hug the upper-left corner, while Table 4.2 lies close to the diagonal (random classifier).