# ── Step 1: Set CRAN mirror & install packages if not already installed ──
options(repos = c(CRAN = "https://cran.rstudio.com/"))

if (!require(pROC,    quietly = TRUE)) install.packages("pROC")
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
if (!require(ggplot2, quietly = TRUE)) install.packages("ggplot2")

# ── Step 2: Load packages ──
library(pROC)
library(ggplot2)

# ── Step 3: Load data — put your actual file path here ──
# Example Windows path: "C:/Users/alex/Downloads/Default.csv"
# Example Mac/Linux:    "/Users/alex/Downloads/Default.csv"
df <- read.csv("C:/Users/alex/Downloads/Default.csv")

# ── Step 4: Prepare variables ──
df$default_bin <- ifelse(df$default == "Yes", 1, 0)
df$student_bin <- ifelse(df$student == "Yes", 1, 0)

# ── Step 5: Fit 3 Logistic Regression Models (Tables 4.1, 4.2, 4.3) ──
model1 <- glm(default_bin ~ balance,                        data = df, family = binomial)
model2 <- glm(default_bin ~ student_bin,                    data = df, family = binomial)
model3 <- glm(default_bin ~ balance + income + student_bin, data = df, family = binomial)

# ── Step 6: Compute ROC curves ──
roc1 <- roc(df$default_bin, predict(model1, type = "response"), quiet = TRUE)
roc2 <- roc(df$default_bin, predict(model2, type = "response"), quiet = TRUE)
roc3 <- roc(df$default_bin, predict(model3, type = "response"), quiet = TRUE)

# ── Step 7: Print AUC Summary ──
cat("\n=== AUC Summary ===\n")
## 
## === AUC Summary ===
cat(sprintf("Table 4.1 - Balance only:               AUC = %.4f\n", as.numeric(auc(roc1))))
## Table 4.1 - Balance only:               AUC = 0.9480
cat(sprintf("Table 4.2 - Student only:               AUC = %.4f\n", as.numeric(auc(roc2))))
## Table 4.2 - Student only:               AUC = 0.5450
cat(sprintf("Table 4.3 - Balance + Income + Student: AUC = %.4f\n", as.numeric(auc(roc3))))
## Table 4.3 - Balance + Income + Student: AUC = 0.9496
# ── Step 8: Build data frames for ggplot ──
make_roc_df <- function(roc_obj, label) {
  data.frame(
    FPR   = 1 - roc_obj$specificities,
    TPR   = roc_obj$sensitivities,
    Model = label
  )
}

roc_df <- rbind(
  make_roc_df(roc1, sprintf("Table 4.1: Balance only\nAUC = %.4f",               as.numeric(auc(roc1)))),
  make_roc_df(roc2, sprintf("Table 4.2: Student only\nAUC = %.4f",               as.numeric(auc(roc2)))),
  make_roc_df(roc3, sprintf("Table 4.3: Balance + Income + Student\nAUC = %.4f", as.numeric(auc(roc3))))
)

# Lock factor order so panels appear left to right
roc_df$Model <- factor(roc_df$Model, levels = unique(roc_df$Model))

# ── Step 9: Plot ──
ggplot(roc_df, aes(x = FPR, y = TPR, color = Model)) +
  geom_abline(slope = 1, intercept = 0,
              linetype = "dashed", color = "gray50", linewidth = 0.8) +
  geom_line(linewidth = 1.2) +
  facet_wrap(~ Model) +
  scale_color_manual(values = c("#ff4d6d", "#ffd166", "#4dffb4")) +
  scale_x_continuous(limits = c(0, 1), breaks = c(0, 0.5, 1)) +
  scale_y_continuous(limits = c(0, 1), breaks = c(0, 0.5, 1)) +
  labs(
    title    = "ROC Curves — Default Dataset",
    subtitle = "Tables 4.1, 4.2, 4.3 (Logistic Regression)",
    x        = "False Positive Rate (1 - Specificity)",
    y        = "True Positive Rate (Sensitivity)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    strip.text      = element_text(size = 9,  face = "bold"),
    plot.title      = element_text(size = 14, face = "bold"),
    plot.subtitle   = element_text(color = "gray50"),
    panel.grid.minor = element_blank()
  )