#Table 4.4

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom        1.0.8     ✔ rsample      1.3.0
## ✔ dials        1.4.0     ✔ tibble       3.2.1
## ✔ ggplot2      3.5.1     ✔ tidyr        1.3.1
## ✔ infer        1.0.7     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflows    1.2.0
## ✔ parsnip      1.3.1     ✔ workflowsets 1.1.0
## ✔ purrr        1.0.4     ✔ yardstick    1.3.2
## ✔ recipes      1.2.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
library(stringr)
## 
## Attaching package: 'stringr'
## The following object is masked from 'package:recipes':
## 
##     fixed
library(ggplot2)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
boston <- Boston %>%
  mutate(crim01 = ifelse(crim > median(crim), 1, 0),
         chas = factor(chas), 
         crim01 = factor(crim01))
boston %>%
  count(chas, crim01) %>%
  ggplot(aes(y = chas, x = crim01)) +
  geom_tile(aes(fill = n)) +
  geom_text(aes(label = n), color = "white") +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(x = "crim01 (Low = 0, High = 1)", y = "Charles River (chas)")

#Table 4.5

library(dplyr)
library(gt)
library(tidymodels)
library(stringr)
library(MASS)     # for lda()
library(tidyr)
data("Default")  # From ISLR package
## Warning in data("Default"): data set 'Default' not found
default <- ISLR::Default
lda_fit <- lda(default ~ balance + income, data = default)
lda_posterior <- predict(lda_fit)$posterior
lda_pred_20 <- bind_cols(
  default,
  posterior_prob_default = lda_posterior[, 2]
) %>%
  mutate(
    pred_default = ifelse(posterior_prob_default > 0.2, "Yes", "No")
  )
lda_pred_20 %>%
  count(pred_default, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default") %>%
  tab_spanner(label = "True default status", columns = everything()) %>%
  tab_stubhead("Predicted") %>%
  grand_summary_rows(
    columns = c("No", "Yes", "Total"),
    fns = list(Total = ~round(sum(.), 0))
  )
Predicted
True default status
No Yes Total
No 9427 144 9571
Yes 240 189 429
Total 9667 333 10000

#Tables 4.8 4.9

library(dplyr)
library(gt)
library(tidymodels)
library(stringr)
library(e1071)    # for naiveBayes
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:tune':
## 
##     tune
## The following object is masked from 'package:rsample':
## 
##     permutations
## The following object is masked from 'package:parsnip':
## 
##     tune
library(tidyr) 
default <- ISLR::Default
nb_model <- naiveBayes(default ~ balance + income, data = default)
nb_prob <- predict(nb_model, newdata = default, type = "raw")
nb_pred <- default %>%
  mutate(nb_prob_default = nb_prob[, "Yes"])
nb_pred <- nb_pred %>%
  mutate(
    pred_default_0.5 = ifelse(nb_prob_default > 0.5, "Yes", "No"),
    pred_default_0.2 = ifelse(nb_prob_default > 0.2, "Yes", "No")
  )
nb_pred %>%
  count(pred_default_0.5, default) %>%
  pivot_wider(names_from = default, values_from = n) %>%
  mutate(Total = No + Yes) %>%
  gt(rowname_col = "pred_default_0.5") %>%
  tab_spanner(label = "True default status", columns = everything()) %>%
  tab_stubhead("Predicted (Threshold 0.5)") %>%
  grand_summary_rows(
    columns = c("No", "Yes", "Total"),
    fns = list(Total = ~round(sum(.), 0))
  )
Predicted (Threshold 0.5)
True default status
No Yes Total
No 9628 242 9870
Yes 39 91 130
Total 9667 333 10000