Load Data

data <- read_csv("Default.csv")
## New names:
## Rows: 10000 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): default, student dbl (3): ...1, balance, income
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
head(data)
## # A tibble: 6 × 5
##    ...1 default student balance income
##   <dbl> <chr>   <chr>     <dbl>  <dbl>
## 1     1 No      No         730. 44362.
## 2     2 No      Yes        817. 12106.
## 3     3 No      No        1074. 31767.
## 4     4 No      No         529. 35704.
## 5     5 No      No         786. 38463.
## 6     6 No      Yes        920.  7492.

Make sure variables are correct types:

data <- data %>%
  mutate(
    default = as.factor(default),
    student = as.factor(student)
  )

Models (Table 4.1, 4.2, 4.3)

Model 1: balance only

model1 <- glm(default ~ balance,
              data = data,
              family = binomial)

Model 2: income only

model2 <- glm(default ~ income,
              data = data,
              family = binomial)

Model 3: balance + income + student

model3 <- glm(default ~ balance + income + student,
              data = data,
              family = binomial)

Predicted Probabilities

prob1 <- predict(model1, type = "response")
prob2 <- predict(model2, type = "response")
prob3 <- predict(model3, type = "response")

ROC Curves

roc1 <- roc(data$default, prob1)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
roc2 <- roc(data$default, prob2)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases
roc3 <- roc(data$default, prob3)
## Setting levels: control = No, case = Yes
## Setting direction: controls < cases

Plot ROC Curves Together

plot(roc1, col = "blue", main = "ROC Curves for Default Models")
plot(roc2, col = "red", add = TRUE)
plot(roc3, col = "green", add = TRUE)

legend("bottomright",
       legend = c("Balance", "Income", "Full Model"),
       col = c("blue", "red", "green"),
       lwd = 2)


AUC Comparison

auc(roc1)
## Area under the curve: 0.948
auc(roc2)
## Area under the curve: 0.5327
auc(roc3)
## Area under the curve: 0.9496

Interpretation

This confirms that credit card balance is the strongest predictor of default.


Conclusion

ROC curves show that not all predictors contribute equally to classification performance. Among the models, balance provides the most predictive power, while income adds little explanatory value. The full model performs best overall, but only marginally better than using balance alone.