Load Libraries and Data
library(ggplot2)
library(pROC)
Default <- read.csv("Default.csv")
Default$default <- ifelse(Default$default == "Yes", 1, 0)
Default$student <- ifelse(Default$student == "Yes", 1, 0)
Fit Logistic Regression Models
Model 1 – Table 4.1: default ~ balance
model1 <- glm(default ~ balance, data = Default, family = binomial)
summary(model1)
##
## Call:
## glm(formula = default ~ balance, family = binomial, data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.065e+01 3.612e-01 -29.49 <2e-16 ***
## balance 5.499e-03 2.204e-04 24.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1596.5 on 9998 degrees of freedom
## AIC: 1600.5
##
## Number of Fisher Scoring iterations: 8
Model 2 – Table 4.2: default ~ student
model2 <- glm(default ~ student, data = Default, family = binomial)
summary(model2)
##
## Call:
## glm(formula = default ~ student, family = binomial, data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.50413 0.07071 -49.55 < 2e-16 ***
## student 0.40489 0.11502 3.52 0.000431 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 2908.7 on 9998 degrees of freedom
## AIC: 2912.7
##
## Number of Fisher Scoring iterations: 6
Model 3 – Table 4.3: default ~ balance + income + student
model3 <- glm(default ~ balance + income + student, data = Default, family = binomial)
summary(model3)
##
## Call:
## glm(formula = default ~ balance + income + student, family = binomial,
## data = Default)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.087e+01 4.923e-01 -22.080 < 2e-16 ***
## balance 5.737e-03 2.319e-04 24.738 < 2e-16 ***
## income 3.033e-06 8.203e-06 0.370 0.71152
## student -6.468e-01 2.363e-01 -2.738 0.00619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2920.6 on 9999 degrees of freedom
## Residual deviance: 1571.5 on 9996 degrees of freedom
## AIC: 1579.5
##
## Number of Fisher Scoring iterations: 8
Predicted Probabilities
prob1 <- predict(model1, type = "response")
prob2 <- predict(model2, type = "response")
prob3 <- predict(model3, type = "response")
ROC Curves
roc1 <- roc(Default$default, prob1)
roc2 <- roc(Default$default, prob2)
roc3 <- roc(Default$default, prob3)
# Base plot
plot(roc1, col = "red", lwd = 2,
main = "ROC Curves for Default Dataset",
xlab = "False Positive Rate (1 - Specificity)",
ylab = "True Positive Rate (Sensitivity)",
legacy.axes = TRUE)
plot(roc2, col = "blue", lwd = 2, add = TRUE)
plot(roc3, col = "darkgreen", lwd = 2, add = TRUE)
abline(a = 0, b = 1, lty = 2, col = "gray")
legend("bottomright",
legend = c(
paste0("Model 1: balance (AUC = ", round(auc(roc1), 4), ")"),
paste0("Model 2: student (AUC = ", round(auc(roc2), 4), ")"),
paste0("Model 3: balance + income + student (AUC = ", round(auc(roc3), 4), ")")
),
col = c("red", "blue", "darkgreen"),
lwd = 2)

AUC Summary
auc_table <- data.frame(
Model = c("Model 1: balance",
"Model 2: student",
"Model 3: balance + income + student"),
AUC = c(round(auc(roc1), 4),
round(auc(roc2), 4),
round(auc(roc3), 4))
)
knitr::kable(auc_table, caption = "AUC Values for Each Model")
AUC Values for Each Model
| Model 1: balance |
0.9480 |
| Model 2: student |
0.5450 |
| Model 3: balance + income + student |
0.9496 |
ggplot Version of ROC Curves
roc_df <- rbind(
data.frame(FPR = 1 - roc1$specificities, TPR = roc1$sensitivities,
Model = paste0("balance (AUC=", round(auc(roc1), 4), ")")),
data.frame(FPR = 1 - roc2$specificities, TPR = roc2$sensitivities,
Model = paste0("student (AUC=", round(auc(roc2), 4), ")")),
data.frame(FPR = 1 - roc3$specificities, TPR = roc3$sensitivities,
Model = paste0("balance+income+student (AUC=", round(auc(roc3), 4), ")"))
)
ggplot(roc_df, aes(x = FPR, y = TPR, color = Model)) +
geom_line(linewidth = 1) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray") +
labs(title = "ROC Curves for Default Dataset",
x = "False Positive Rate (1 - Specificity)",
y = "True Positive Rate (Sensitivity)") +
theme_minimal() +
theme(legend.position = "bottom")
