Overview
This document fits the three logistic regression models described in
Tables 4.1, 4.2, and 4.3 of An Introduction to Statistical
Learning (ISLR) using the Default dataset, then plots
their ROC curves for comparison.
| 4.1 |
Simple logistic regression |
balance only |
| 4.2 |
Simple logistic regression |
student only |
| 4.3 |
Multiple logistic regression |
balance + income + student |
1. Load Packages & Data
# Install packages if needed:
# install.packages(c("tidyverse", "pROC", "ISLR"))
library(tidyverse)
library(pROC)
# Option A: load from ISLR package
# library(ISLR)
# data(Default)
# Option B: load from CSV (used here)
Default <- read.csv("Default.csv", row.names = 1)
# Convert to factors
Default$default <- factor(Default$default, levels = c("No", "Yes"))
Default$student <- factor(Default$student, levels = c("No", "Yes"))
glimpse(Default)
## Rows: 10,000
## Columns: 4
## $ default <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No, No…
## $ student <fct> No, Yes, No, No, No, Yes, No, Yes, No, No, Yes, Yes, No, No, N…
## $ balance <dbl> 729.5265, 817.1804, 1073.5492, 529.2506, 785.6559, 919.5885, 8…
## $ income <dbl> 44361.625, 12106.135, 31767.139, 35704.494, 38463.496, 7491.55…
2. Fit the Three Models
Table 4.1 — Balance Only
# Table 4.1: Simple logistic regression with balance
model_4_1 <- glm(default ~ balance,
data = Default,
family = binomial)
summary(model_4_1)
##
## Call:
## glm(formula = default ~ balance, family = binomial, data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.065e+01 3.612e-01 -29.49 <2e-16 ***
## balance 5.499e-03 2.204e-04 24.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1596.5 on 9998 degrees of freedom
## AIC: 1600.5
##
## Number of Fisher Scoring iterations: 8
Table 4.2 — Student Status Only
# Table 4.2: Simple logistic regression with student
model_4_2 <- glm(default ~ student,
data = Default,
family = binomial)
summary(model_4_2)
##
## Call:
## glm(formula = default ~ student, family = binomial, data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.50413 0.07071 -49.55 < 2e-16 ***
## studentYes 0.40489 0.11502 3.52 0.000431 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 2908.7 on 9998 degrees of freedom
## AIC: 2912.7
##
## Number of Fisher Scoring iterations: 6
Table 4.3 — Balance + Income + Student
# Table 4.3: Multiple logistic regression
model_4_3 <- glm(default ~ balance + income + student,
data = Default,
family = binomial)
summary(model_4_3)
##
## Call:
## glm(formula = default ~ balance + income + student, family = binomial,
## data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.087e+01 4.923e-01 -22.080 < 2e-16 ***
## balance 5.737e-03 2.319e-04 24.738 < 2e-16 ***
## income 3.033e-06 8.203e-06 0.370 0.71152
## studentYes -6.468e-01 2.363e-01 -2.738 0.00619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1571.5 on 9996 degrees of freedom
## AIC: 1579.5
##
## Number of Fisher Scoring iterations: 8
3. Compute Predicted Probabilities
Default$prob_4_1 <- predict(model_4_1, type = "response")
Default$prob_4_2 <- predict(model_4_2, type = "response")
Default$prob_4_3 <- predict(model_4_3, type = "response")
4. Build ROC Objects
roc_4_1 <- roc(Default$default, Default$prob_4_1, levels = c("No", "Yes"))
roc_4_2 <- roc(Default$default, Default$prob_4_2, levels = c("No", "Yes"))
roc_4_3 <- roc(Default$default, Default$prob_4_3, levels = c("No", "Yes"))
# Print AUC values
cat(sprintf("AUC — Table 4.1 (balance): %.4f\n", auc(roc_4_1)))
## AUC — Table 4.1 (balance): 0.9480
cat(sprintf("AUC — Table 4.2 (student): %.4f\n", auc(roc_4_2)))
## AUC — Table 4.2 (student): 0.5450
cat(sprintf("AUC — Table 4.3 (balance + income + student): %.4f\n", auc(roc_4_3)))
## AUC — Table 4.3 (balance + income + student): 0.9496
5. Plot ROC Curves
5a. Base R Plot
plot(roc_4_1,
col = "steelblue",
lwd = 2,
main = "ROC Curves — Default Dataset (ISLR Tables 4.1–4.3)",
xlab = "False Positive Rate (1 – Specificity)",
ylab = "True Positive Rate (Sensitivity)")
plot(roc_4_2, col = "tomato", lwd = 2, add = TRUE)
plot(roc_4_3, col = "darkgreen", lwd = 2, add = TRUE)
abline(a = 0, b = 1, lty = 2, col = "grey50") # random-classifier line
legend("bottomright",
legend = c(
sprintf("Table 4.1: balance only (AUC = %.3f)", auc(roc_4_1)),
sprintf("Table 4.2: student only (AUC = %.3f)", auc(roc_4_2)),
sprintf("Table 4.3: balance+income+student (AUC = %.3f)", auc(roc_4_3))
),
col = c("steelblue", "tomato", "darkgreen"),
lwd = 2,
bty = "n",
cex = 0.85)

5b. ggplot2 Plot
# Helper to convert a pROC roc object to a tidy data frame
roc_to_df <- function(roc_obj, label) {
data.frame(
FPR = 1 - roc_obj$specificities,
TPR = roc_obj$sensitivities,
Model = label
)
}
roc_df <- bind_rows(
roc_to_df(roc_4_1,
sprintf("Table 4.1: balance only (AUC = %.3f)", auc(roc_4_1))),
roc_to_df(roc_4_2,
sprintf("Table 4.2: student only (AUC = %.3f)", auc(roc_4_2))),
roc_to_df(roc_4_3,
sprintf("Table 4.3: balance+income+student (AUC = %.3f)", auc(roc_4_3)))
)
ggplot(roc_df, aes(x = FPR, y = TPR, colour = Model)) +
geom_line(linewidth = 1) +
geom_abline(slope = 1, intercept = 0,
linetype = "dashed", colour = "grey50") +
scale_colour_manual(values = c("steelblue", "tomato", "darkgreen")) +
labs(
title = "ROC Curves — Default Dataset (ISLR Tables 4.1–4.3)",
x = "False Positive Rate (1 – Specificity)",
y = "True Positive Rate (Sensitivity)",
colour = NULL
) +
theme_bw(base_size = 13) +
theme(
legend.position = "bottom",
legend.direction = "vertical",
plot.title = element_text(face = "bold", hjust = 0.5)
)

6. Summary Table
summary_tbl <- data.frame(
Table = c("4.1", "4.2", "4.3"),
Model = c("balance only",
"student only",
"balance + income + student"),
AUC = round(c(auc(roc_4_1), auc(roc_4_2), auc(roc_4_3)), 4)
)
knitr::kable(summary_tbl,
caption = "AUC Comparison Across the Three ISLR Models",
align = c("c", "l", "c"))
AUC Comparison Across the Three ISLR Models
| 4.1 |
balance only |
0.9480 |
| 4.2 |
student only |
0.5450 |
| 4.3 |
balance + income + student |
0.9496 |
7. Interpretation
- Table 4.1 (balance) achieves the highest
single-predictor AUC. Balance is a strong continuous predictor of
default.
- Table 4.2 (student) has a much lower AUC — the
binary student indicator alone provides little discriminative
power.
- Table 4.3 (all three predictors) matches or
slightly exceeds Table 4.1, confirming that once balance is included,
income and student status add only marginal improvement in
discrimination (though they remain statistically and practically
meaningful).