# ── Step 1: Set CRAN mirror & install packages if not already installed ──
options(repos = c(CRAN = "https://cran.rstudio.com/"))
if (!require(pROC, quietly = TRUE)) install.packages("pROC")
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
if (!require(ggplot2, quietly = TRUE)) install.packages("ggplot2")
# ── Step 2: Load packages ──
library(pROC)
library(ggplot2)
# ── Step 3: Load data — put your actual file path here ──
# Example Windows path: "C:/Users/alex/Downloads/Default.csv"
# Example Mac/Linux: "/Users/alex/Downloads/Default.csv"
df <- read.csv("C:/Users/alex/Downloads/Default.csv")
# ── Step 4: Prepare variables ──
df$default_bin <- ifelse(df$default == "Yes", 1, 0)
df$student_bin <- ifelse(df$student == "Yes", 1, 0)
# ── Step 5: Fit 3 Logistic Regression Models (Tables 4.1, 4.2, 4.3) ──
model1 <- glm(default_bin ~ balance, data = df, family = binomial)
model2 <- glm(default_bin ~ student_bin, data = df, family = binomial)
model3 <- glm(default_bin ~ balance + income + student_bin, data = df, family = binomial)
# ── Step 6: Compute ROC curves ──
roc1 <- roc(df$default_bin, predict(model1, type = "response"), quiet = TRUE)
roc2 <- roc(df$default_bin, predict(model2, type = "response"), quiet = TRUE)
roc3 <- roc(df$default_bin, predict(model3, type = "response"), quiet = TRUE)
# ── Step 7: Print AUC Summary ──
cat("\n=== AUC Summary ===\n")
##
## === AUC Summary ===
cat(sprintf("Table 4.1 - Balance only: AUC = %.4f\n", as.numeric(auc(roc1))))
## Table 4.1 - Balance only: AUC = 0.9480
cat(sprintf("Table 4.2 - Student only: AUC = %.4f\n", as.numeric(auc(roc2))))
## Table 4.2 - Student only: AUC = 0.5450
cat(sprintf("Table 4.3 - Balance + Income + Student: AUC = %.4f\n", as.numeric(auc(roc3))))
## Table 4.3 - Balance + Income + Student: AUC = 0.9496
# ── Step 8: Build data frames for ggplot ──
make_roc_df <- function(roc_obj, label) {
data.frame(
FPR = 1 - roc_obj$specificities,
TPR = roc_obj$sensitivities,
Model = label
)
}
roc_df <- rbind(
make_roc_df(roc1, sprintf("Table 4.1: Balance only\nAUC = %.4f", as.numeric(auc(roc1)))),
make_roc_df(roc2, sprintf("Table 4.2: Student only\nAUC = %.4f", as.numeric(auc(roc2)))),
make_roc_df(roc3, sprintf("Table 4.3: Balance + Income + Student\nAUC = %.4f", as.numeric(auc(roc3))))
)
# Lock factor order so panels appear left to right
roc_df$Model <- factor(roc_df$Model, levels = unique(roc_df$Model))
# ── Step 9: Plot ──
ggplot(roc_df, aes(x = FPR, y = TPR, color = Model)) +
geom_abline(slope = 1, intercept = 0,
linetype = "dashed", color = "gray50", linewidth = 0.8) +
geom_line(linewidth = 1.2) +
facet_wrap(~ Model) +
scale_color_manual(values = c("#ff4d6d", "#ffd166", "#4dffb4")) +
scale_x_continuous(limits = c(0, 1), breaks = c(0, 0.5, 1)) +
scale_y_continuous(limits = c(0, 1), breaks = c(0, 0.5, 1)) +
labs(
title = "ROC Curves — Default Dataset",
subtitle = "Tables 4.1, 4.2, 4.3 (Logistic Regression)",
x = "False Positive Rate (1 - Specificity)",
y = "True Positive Rate (Sensitivity)"
) +
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
strip.text = element_text(size = 9, face = "bold"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(color = "gray50"),
panel.grid.minor = element_blank()
)
