hm6

ROC Curves for Default Dataset

We fit logistic regression models based on Tables 4.1, 4.2, and 4.3 from ISLR and plot ROC curves.

Model 1 (Table 4.1): balance only
Model 2 (Table 4.2): balance + income + student
Model 3 (Table 4.3): student only

# Load required libraries
library(ISLR2)
library(pROC)
library(ggplot2)

# Use built-in Default dataset (same as Default.csv)
data(Default)
str(Default)

## 'data.frame':    10000 obs. of  4 variables:
##  $ default: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ student: Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
##  $ balance: num  730 817 1074 529 786 ...
##  $ income : num  44362 12106 31767 35704 38463 ...

# Model 1 (Table 4.1): balance only
model1 <- glm(default ~ balance, data = Default, family = binomial)

# Model 2 (Table 4.2): balance + income + student
model2 <- glm(default ~ balance + income + student, data = Default, family = binomial)

# Model 3 (Table 4.3): student only
model3 <- glm(default ~ student, data = Default, family = binomial)

# Predicted probabilities
prob1 <- predict(model1, type = "response")
prob2 <- predict(model2, type = "response")
prob3 <- predict(model3, type = "response")

# Compute ROC objects
roc1 <- roc(Default$default, prob1, levels = c("No", "Yes"), direction = "<")
roc2 <- roc(Default$default, prob2, levels = c("No", "Yes"), direction = "<")
roc3 <- roc(Default$default, prob3, levels = c("No", "Yes"), direction = "<")

# AUC values
auc1 <- round(auc(roc1), 4)
auc2 <- round(auc(roc2), 4)
auc3 <- round(auc(roc3), 4)

# Build data frames for ggplot
roc_df <- function(roc_obj, model_name, auc_val) {
  data.frame(
    FPR  = 1 - roc_obj$specificities,
    TPR  = roc_obj$sensitivities,
    Model = paste0(model_name, " (AUC = ", auc_val, ")")
  )
}

df1 <- roc_df(roc1, "Model 1: balance", auc1)
df2 <- roc_df(roc2, "Model 2: balance+income+student", auc2)
df3 <- roc_df(roc3, "Model 3: student", auc3)

roc_data <- rbind(df1, df2, df3)

# Plot
ggplot(roc_data, aes(x = FPR, y = TPR, color = Model)) +
  geom_line(size = 1.1) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray50") +
  scale_color_manual(values = c("#E41A1C", "#377EB8", "#4DAF4A")) +
  labs(
    title = "ROC Curves for Default Dataset (Tables 4.1, 4.2, 4.3)",
    x     = "False Positive Rate (1 - Specificity)",
    y     = "True Positive Rate (Sensitivity)",
    color = "Model"
  ) +
  theme_bw(base_size = 13) +
  theme(legend.position = "bottom",
        legend.text = element_text(size = 10))

cat("AUC Summary:\n")

## AUC Summary:

cat(sprintf("  Model 1 (balance only):              %.4f\n", auc1))

##   Model 1 (balance only):              0.9480

cat(sprintf("  Model 2 (balance + income + student): %.4f\n", auc2))

##   Model 2 (balance + income + student): 0.9496

cat(sprintf("  Model 3 (student only):              %.4f\n", auc3))

##   Model 3 (student only):              0.5450

hm6

114035122

2026-03-31

ROC Curves for Default Dataset

Summary