#Table 4.4
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom 1.0.8 ✔ rsample 1.3.0
## ✔ dials 1.4.0 ✔ tibble 3.2.1
## ✔ ggplot2 3.5.1 ✔ tidyr 1.3.1
## ✔ infer 1.0.7 ✔ tune 1.3.0
## ✔ modeldata 1.4.0 ✔ workflows 1.2.0
## ✔ parsnip 1.3.1 ✔ workflowsets 1.1.0
## ✔ purrr 1.0.4 ✔ yardstick 1.3.2
## ✔ recipes 1.2.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:recipes':
##
## fixed
library(ggplot2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
boston <- Boston %>%
mutate(crim01 = ifelse(crim > median(crim), 1, 0),
chas = factor(chas),
crim01 = factor(crim01))
boston %>%
count(chas, crim01) %>%
ggplot(aes(y = chas, x = crim01)) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n), color = "white") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme_minimal() +
theme(legend.position = "none") +
labs(x = "crim01 (Low = 0, High = 1)", y = "Charles River (chas)")
#Table 4.5
library(dplyr)
library(gt)
library(tidymodels)
library(stringr)
library(MASS) # for lda()
library(tidyr)
data("Default") # From ISLR package
## Warning in data("Default"): data set 'Default' not found
default <- ISLR::Default
lda_fit <- lda(default ~ balance + income, data = default)
lda_posterior <- predict(lda_fit)$posterior
lda_pred_20 <- bind_cols(
default,
posterior_prob_default = lda_posterior[, 2]
) %>%
mutate(
pred_default = ifelse(posterior_prob_default > 0.2, "Yes", "No")
)
lda_pred_20 %>%
count(pred_default, default) %>%
pivot_wider(names_from = default, values_from = n) %>%
mutate(Total = No + Yes) %>%
gt(rowname_col = "pred_default") %>%
tab_spanner(label = "True default status", columns = everything()) %>%
tab_stubhead("Predicted") %>%
grand_summary_rows(
columns = c("No", "Yes", "Total"),
fns = list(Total = ~round(sum(.), 0))
)
Predicted |
True default status
|
||
---|---|---|---|
No | Yes | Total | |
No | 9427 | 144 | 9571 |
Yes | 240 | 189 | 429 |
Total | 9667 | 333 | 10000 |
#Tables 4.8 4.9
library(dplyr)
library(gt)
library(tidymodels)
library(stringr)
library(e1071) # for naiveBayes
##
## Attaching package: 'e1071'
## The following object is masked from 'package:tune':
##
## tune
## The following object is masked from 'package:rsample':
##
## permutations
## The following object is masked from 'package:parsnip':
##
## tune
library(tidyr)
default <- ISLR::Default
nb_model <- naiveBayes(default ~ balance + income, data = default)
nb_prob <- predict(nb_model, newdata = default, type = "raw")
nb_pred <- default %>%
mutate(nb_prob_default = nb_prob[, "Yes"])
nb_pred <- nb_pred %>%
mutate(
pred_default_0.5 = ifelse(nb_prob_default > 0.5, "Yes", "No"),
pred_default_0.2 = ifelse(nb_prob_default > 0.2, "Yes", "No")
)
nb_pred %>%
count(pred_default_0.5, default) %>%
pivot_wider(names_from = default, values_from = n) %>%
mutate(Total = No + Yes) %>%
gt(rowname_col = "pred_default_0.5") %>%
tab_spanner(label = "True default status", columns = everything()) %>%
tab_stubhead("Predicted (Threshold 0.5)") %>%
grand_summary_rows(
columns = c("No", "Yes", "Total"),
fns = list(Total = ~round(sum(.), 0))
)
Predicted (Threshold 0.5) |
True default status
|
||
---|---|---|---|
No | Yes | Total | |
No | 9628 | 242 | 9870 |
Yes | 39 | 91 | 130 |
Total | 9667 | 333 | 10000 |