Install Necessary Packages

install.packages(c("dplyr", "gt", "tidymodels", "stringr", "ISLR2", "klaR"))

Load Libraries

library(ISLR2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom        1.0.7     ✔ rsample      1.2.1
## ✔ dials        1.4.0     ✔ tibble       3.2.1
## ✔ ggplot2      3.5.1     ✔ tidyr        1.3.1
## ✔ infer        1.0.7     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflows    1.2.0
## ✔ parsnip      1.3.1     ✔ workflowsets 1.1.0
## ✔ purrr        1.0.4     ✔ yardstick    1.3.2
## ✔ recipes      1.1.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:recipes':
## 
##     fixed
library(tidyr)
library(MASS) 
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:ISLR2':
## 
##     Boston
library(klaR) 

Load Dataset

data("Default")

Train the LDA Model

lda_default_balance_student <- lda(default ~ balance + student, data = Default)
lda_posterior <- predict(lda_default_balance_student, newdata = Default)$posterior

LDA Predictions

lda_pred <- bind_cols(
  pred_default = predict(lda_default_balance_student, newdata = Default)$class,
  Default
)

Modify Threshold for 0.2 Probability

lda_pred_20 <- bind_cols(
  Default,
  posterior_prob_default = lda_posterior[, 2]
) %>%
  mutate(pred_default = ifelse(posterior_prob_default > 0.2, "Yes", "No"))

Train the Naive Bayes Model

nb_model <- NaiveBayes(default ~ balance + student, data = Default)
nb_prob_default <- predict(nb_model, newdata = Default)$posterior[, 2]

Naive Bayes Predictions

nb_pred <- Default %>%
  mutate(nb_prob_default = nb_prob_default)

Table 4.4: LDA Prediction Table

lda_pred %>%
  count(pred_default, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default") %>%
  gt::tab_spanner(label = "True default status", columns = everything()) %>%
  gt::tab_stubhead("Predicted") %>%
  gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
  gt::tab_caption("Table 4.4: Confusion Matrix for LDA Model")
Table 4.4: Confusion Matrix for LDA Model
Predicted
True default status
No Yes Total
No 9644 252 9896
Yes 23 81 104
Total 9667 333 10000

Table 4.5: LDA Threshold (0.2) Prediction Table

lda_pred_20 %>%
  count(pred_default, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default") %>%
  gt::tab_spanner(label = "True default status", columns = everything()) %>%
  gt::tab_stubhead("Predicted") %>%
  gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
  gt::tab_caption("Table 4.5: Confusion Matrix for LDA Model with 0.2 Threshold")
Table 4.5: Confusion Matrix for LDA Model with 0.2 Threshold
Predicted
True default status
No Yes Total
No 9432 138 9570
Yes 235 195 430
Total 9667 333 10000

Generate Naive Bayes Predictions for Different Probability Thresholds

nb_pred <- nb_pred %>%
  mutate(
    pred_default_0.5 = ifelse(nb_prob_default > 0.5, "Yes", "No"),
    pred_default_0.2 = ifelse(nb_prob_default > 0.2, "Yes", "No")
  )

Table 4.8: Naive Bayes Prediction Table

nb_pred %>%
  count(pred_default_0.5, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default_0.5") %>%
  gt::tab_spanner(label = "True default status", columns = everything()) %>%
  gt::tab_stubhead("Predicted") %>%
  gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
  gt::tab_caption("Table 4.8: Confusion Matrix for Naive Bayes Model with 0.5 Threshold")
Table 4.8: Confusion Matrix for Naive Bayes Model with 0.5 Threshold
Predicted
True default status
No Yes Total
No 9621 244 9865
Yes 46 89 135
Total 9667 333 10000