install.packages(c("dplyr", "gt", "tidymodels", "stringr", "ISLR2", "klaR"))
library(ISLR2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom 1.0.7 ✔ rsample 1.2.1
## ✔ dials 1.4.0 ✔ tibble 3.2.1
## ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1
## ✔ infer 1.0.7 ✔ tune 1.3.0
## ✔ modeldata 1.4.0 ✔ workflows 1.2.0
## ✔ parsnip 1.3.1 ✔ workflowsets 1.1.0
## ✔ purrr 1.0.4 ✔ yardstick 1.3.2
## ✔ recipes 1.1.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:recipes':
##
## fixed
library(tidyr)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:ISLR2':
##
## Boston
library(klaR)
data("Default")
lda_default_balance_student <- lda(default ~ balance + student, data = Default)
lda_posterior <- predict(lda_default_balance_student, newdata = Default)$posterior
lda_pred <- bind_cols(
pred_default = predict(lda_default_balance_student, newdata = Default)$class,
Default
)
lda_pred_20 <- bind_cols(
Default,
posterior_prob_default = lda_posterior[, 2]
) %>%
mutate(pred_default = ifelse(posterior_prob_default > 0.2, "Yes", "No"))
nb_model <- NaiveBayes(default ~ balance + student, data = Default)
nb_prob_default <- predict(nb_model, newdata = Default)$posterior[, 2]
nb_pred <- Default %>%
mutate(nb_prob_default = nb_prob_default)
lda_pred %>%
count(pred_default, default) %>%
pivot_wider(names_from = default, values_from = n) %>%
mutate(Total = No + Yes) %>%
gt(rowname_col = "pred_default") %>%
gt::tab_spanner(label = "True default status", columns = everything()) %>%
gt::tab_stubhead("Predicted") %>%
gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
gt::tab_caption("Table 4.4: Confusion Matrix for LDA Model")
Predicted |
True default status
|
||
---|---|---|---|
No | Yes | Total | |
No | 9644 | 252 | 9896 |
Yes | 23 | 81 | 104 |
Total | 9667 | 333 | 10000 |
lda_pred_20 %>%
count(pred_default, default) %>%
pivot_wider(names_from = default, values_from = n) %>%
mutate(Total = No + Yes) %>%
gt(rowname_col = "pred_default") %>%
gt::tab_spanner(label = "True default status", columns = everything()) %>%
gt::tab_stubhead("Predicted") %>%
gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
gt::tab_caption("Table 4.5: Confusion Matrix for LDA Model with 0.2 Threshold")
Predicted |
True default status
|
||
---|---|---|---|
No | Yes | Total | |
No | 9432 | 138 | 9570 |
Yes | 235 | 195 | 430 |
Total | 9667 | 333 | 10000 |
nb_pred <- nb_pred %>%
mutate(
pred_default_0.5 = ifelse(nb_prob_default > 0.5, "Yes", "No"),
pred_default_0.2 = ifelse(nb_prob_default > 0.2, "Yes", "No")
)
nb_pred %>%
count(pred_default_0.5, default) %>%
pivot_wider(names_from = default, values_from = n) %>%
mutate(Total = No + Yes) %>%
gt(rowname_col = "pred_default_0.5") %>%
gt::tab_spanner(label = "True default status", columns = everything()) %>%
gt::tab_stubhead("Predicted") %>%
gt::grand_summary_rows(fns = list(Total = ~round(sum(.), 0))) %>%
gt::tab_caption("Table 4.8: Confusion Matrix for Naive Bayes Model with 0.5 Threshold")
Predicted |
True default status
|
||
---|---|---|---|
No | Yes | Total | |
No | 9621 | 244 | 9865 |
Yes | 46 | 89 | 135 |
Total | 9667 | 333 | 10000 |