library(yardstick)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(purrr)
library(gt)

4.8 13

library(ISLR2)
library(skimr)
weekly <- ISLR2::Weekly
skimr::skim(weekly)
Data summary
Name weekly
Number of rows 1089
Number of columns 9
_______________________
Column type frequency:
factor 1
numeric 8
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Direction 0 1 FALSE 2 Up: 605, Dow: 484

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Year 0 1 2000.05 6.03 1990.00 1995.00 2000.00 2005.00 2010.00 ▇▆▆▆▆
Lag1 0 1 0.15 2.36 -18.20 -1.15 0.24 1.41 12.03 ▁▁▆▇▁
Lag2 0 1 0.15 2.36 -18.20 -1.15 0.24 1.41 12.03 ▁▁▆▇▁
Lag3 0 1 0.15 2.36 -18.20 -1.16 0.24 1.41 12.03 ▁▁▆▇▁
Lag4 0 1 0.15 2.36 -18.20 -1.16 0.24 1.41 12.03 ▁▁▆▇▁
Lag5 0 1 0.14 2.36 -18.20 -1.17 0.23 1.41 12.03 ▁▁▆▇▁
Volume 0 1 1.57 1.69 0.09 0.33 1.00 2.05 9.33 ▇▂▁▁▁
Today 0 1 0.15 2.36 -18.20 -1.15 0.24 1.41 12.03 ▁▁▆▇▁
library(dplyr)
library(corrr)
## 
## Attaching package: 'corrr'
## The following object is masked from 'package:skimr':
## 
##     focus
library(gt)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
library(RColorBrewer)

weekly %>%
  dplyr::select(-Direction) %>%
  corrr::correlate(method = "pearson", quiet = TRUE) %>%
  gt(rowname_col = "term") %>%
  gt::fmt_missing(columns = everything(), missing_text = "") %>%
  gt::data_color(
    columns = everything(),
    colors = scales::col_numeric(
      palette = brewer.pal(5, "RdBu"),  # Alternative to td_pal("div5")
      domain = c(-0.1, 0.9)
    )
  ) %>%
  gt::fmt_number(columns = everything(), decimals = 3)
## Warning: Since gt v0.6.0 `fmt_missing()` is deprecated and will soon be removed.
## ℹ Use `sub_missing()` instead.
## This warning is displayed once every 8 hours.
## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `fn` argument instead.
## This warning is displayed once every 8 hours.
Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today
Year
−0.032 −0.033 −0.030 −0.031 −0.031 0.842 −0.032
Lag1 −0.032
−0.075 0.059 −0.071 −0.008 −0.065 −0.075
Lag2 −0.033 −0.075
−0.076 0.058 −0.072 −0.086 0.059
Lag3 −0.030 0.059 −0.076
−0.075 0.061 −0.069 −0.071
Lag4 −0.031 −0.071 0.058 −0.075
−0.076 −0.061 −0.008
Lag5 −0.031 −0.008 −0.072 0.061 −0.076
−0.059 0.011
Volume 0.842 −0.065 −0.086 −0.069 −0.061 −0.059
−0.033
Today −0.032 −0.075 0.059 −0.071 −0.008 0.011 −0.033
library(ggplot2)
weekly %>%
  ggplot(aes(x = factor(Year), y = Volume)) +
  geom_jitter(width = 0.3, color = "yellow") +
  geom_boxplot(alpha = 0.3, outlier.shape = NA, width = 0.2)

library(forcats)
weekly <- weekly %>% mutate(Direction = fct_rev(Direction))
library(broom)
library(parsnip)
lr_weekly_fit <- logistic_reg() %>%
  fit(Direction ~ Year + Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = weekly)
tidy(lr_weekly_fit) %>% gt()
term estimate std.error statistic p.value
(Intercept) -17.225822230 37.89052190 -0.45462088 0.6493820
Year 0.008499918 0.01899083 0.44758011 0.6544563
Lag1 0.040687571 0.02644652 1.53848459 0.1239302
Lag2 -0.059448637 0.02697031 -2.20422531 0.0275085
Lag3 0.015477987 0.02670309 0.57963289 0.5621622
Lag4 0.027316278 0.02648478 1.03139539 0.3023554
Lag5 0.014022185 0.02640947 0.53095285 0.5954515
Volume -0.003256253 0.06883640 -0.04730423 0.9622708
library(yardstick)
lr_weekly_fit_conf_mat <- augment(lr_weekly_fit, weekly) %>%
  conf_mat(truth = Direction, estimate = .pred_class)
lr_weekly_fit_conf_mat
##           Truth
## Prediction  Up Down
##       Up   558  428
##       Down  47   56
weekly_train <- weekly %>% filter(Year <= 2008)
weekly_test <- weekly %>% filter(Year > 2008)
lr_weekly_fit_lag2 <-
  logistic_reg() %>%
  fit(Direction ~ Lag2, data = weekly_train)
lr_weekly_fit_lag2_conf_mat <-
  augment(lr_weekly_fit_lag2, weekly_test) %>%
  conf_mat(truth = Direction, estimate = .pred_class)
lr_weekly_fit_lag2_conf_mat
##           Truth
## Prediction Up Down
##       Up   56   34
##       Down  5    9
library(discrim)
model_fits <- list(
  "logistic" = lr_weekly_fit_lag2,  # Your logistic regression model
  "LDA" = discrim_linear() %>% fit(Direction ~ Lag2, data = weekly_train),
  "QDA" = discrim_quad() %>% fit(Direction ~ Lag2, data = weekly_train),
  "KNN1" = nearest_neighbor(mode = "classification", neighbors = 1) %>%
    fit(Direction ~ Lag2, data = weekly_train),
  "NB" = naive_Bayes() %>% fit(Direction ~ Lag2, data = weekly_train)
)
library(dplyr)
library(tidyr)
library(gt)
library(purrr)

# Assuming `weekly_test` and `weekly_train` are already loaded and available

model_fits <- list(
  "logistic" = lr_weekly_fit_lag2,  # Your logistic regression model
  "LDA" = discrim_linear() %>% fit(Direction ~ Lag2, data = weekly_train),
  "QDA" = discrim_quad() %>% fit(Direction ~ Lag2, data = weekly_train),
  "KNN1" = nearest_neighbor(mode = "classification", neighbors = 1) %>%
    fit(Direction ~ Lag2, data = weekly_train),
  "NB" = naive_Bayes() %>% fit(Direction ~ Lag2, data = weekly_train)
)

# Define the metric set
weekly_metrics <- metric_set(accuracy, sens, spec, ppv)

# Generate performance metrics for each model
output <- imap_dfr(
  model_fits,
  ~ augment(.x, new_data = weekly_test) %>%
    weekly_metrics(truth = Direction, estimate = .pred_class) %>%
    mutate(model = .y),  # Add model name
  .id = "model"
)

# Check structure of the output
glimpse(output)
## Rows: 20
## Columns: 4
## $ .metric    <chr> "accuracy", "sens", "spec", "ppv", "accuracy", "sens", "spe…
## $ .estimator <chr> "binary", "binary", "binary", "binary", "binary", "binary",…
## $ .estimate  <dbl> 0.6250000, 0.9180328, 0.2093023, 0.6222222, 0.6250000, 0.91…
## $ model      <chr> "logistic", "logistic", "logistic", "logistic", "LDA", "LDA…
library(dplyr)   # Ensure dplyr is loaded
library(tidyr)   # For pivot_wider
library(gt)      # For creating the table

output %>%
  dplyr::select(model, .metric, .estimate) %>%
  tidyr::pivot_wider(names_from = .metric, values_from = .estimate) %>%
  gt(rowname_col = "model") %>%
  gt::fmt_percent(columns = -model)
accuracy sens spec ppv
logistic 62.50% 91.80% 20.93% 62.22%
LDA 62.50% 91.80% 20.93% 62.22%
QDA 58.65% 100.00% 0.00% 58.65%
KNN1 50.00% 49.18% 51.16% 58.82%
NB 60.58% 91.80% 16.28% 60.87%
auto <- ISLR2::Auto %>%
  mutate(mpg01 = ifelse(mpg > median(mpg), 1, 0),
         mpg01 = factor(mpg01))
glimpse(auto)
## Rows: 392
## Columns: 10
## $ mpg          <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, 15, 14, 15, 14, 2…
## $ cylinders    <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, …
## $ displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 383, 34…
## $ horsepower   <int> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 170, 16…
## $ weight       <int> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425, 385…
## $ acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8.5, …
## $ year         <int> 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 7…
## $ origin       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, …
## $ name         <fct> chevrolet chevelle malibu, buick skylark 320, plymouth sa…
## $ mpg01        <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, …
auto <- auto %>%
  mutate(origin = factor(origin, levels = 1:3,
                         labels = c("American", "European", "Japanese")))
library(dplyr)  # Ensure dplyr is loaded
library(tidyr)  # For pivot_longer
library(ggplot2)  # For ggplot
library(ggplot2)  # For adding facet borders, if necessary

auto %>%
  dplyr::select(-name, -origin, -mpg) %>%
  pivot_longer(-mpg01, names_to = "var", values_to = "val") %>%
  ggplot(aes(y = mpg01, x = val)) +
  geom_boxplot(aes(fill = factor(mpg01))) +
  facet_wrap(~var, scales = "free_x") +
  theme(
    legend.position = "none",
    strip.background = element_rect(color = "black", fill = "lightgray", size = 1),  # Adds border to facet labels
    strip.border = element_rect(color = "black", size = 1)  # Adds border to facet labels
  )
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in plot_theme(plot): The `strip.border` theme element is not defined in
## the element hierarchy.

auto %>%
  count(origin, mpg01) %>%
  ggplot(aes(y = origin, x = mpg01)) +
  geom_tile(aes(fill = n)) +
  geom_text(aes(label = n), color = "white") +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  theme(legend.position = "none")

auto %>%
  dplyr::select(-c(name, mpg01, origin)) %>%
  corrr::correlate(method = "pearson", quiet = TRUE) %>%
  gt(rowname_col = "term") %>%
  gt::fmt_missing(columns = everything(), missing_text = "") %>%
  gt::data_color(
    columns = everything(),
    colors = scales::col_numeric(
      palette = "RdBu",  # You can adjust the palette as needed
      domain = c(-1, 1)
    )
  ) %>%
  gt::fmt_number(columns = everything(), decimals = 2)
mpg cylinders displacement horsepower weight acceleration year
mpg
−0.78 −0.81 −0.78 −0.83 0.42 0.58
cylinders −0.78
0.95 0.84 0.90 −0.50 −0.35
displacement −0.81 0.95
0.90 0.93 −0.54 −0.37
horsepower −0.78 0.84 0.90
0.86 −0.69 −0.42
weight −0.83 0.90 0.93 0.86
−0.42 −0.31
acceleration 0.42 −0.50 −0.54 −0.69 −0.42
0.29
year 0.58 −0.35 −0.37 −0.42 −0.31 0.29
library(rsample)  # Load the package
set.seed(49)
auto_split <- initial_split(auto, prop = 3 / 4)
auto_train <- training(auto_split)
auto_test <- testing(auto_split)
auto_train %>% count(mpg01)
##   mpg01   n
## 1     0 148
## 2     1 146
auto_test %>% count(mpg01)
##   mpg01  n
## 1     0 48
## 2     1 50
library(recipes)  # Load the package
## 
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
## 
##     step
auto_recipe <- recipe(
  mpg01 ~ cylinders + displacement + horsepower + weight + acceleration +
    year + origin,
  data = auto_train
) %>%
  # Normalize numerical predictors to work with KNN
  step_normalize(all_numeric_predictors()) %>%
  step_dummy(origin)
library(workflows)  # Load the package
auto_workflow <- workflow() %>%
  add_recipe(auto_recipe)
# Load all required packages first
library(tidymodels)  # This includes dplyr, purrr, etc.
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ dials        1.4.0     ✔ tibble       3.2.1
## ✔ infer        1.0.7     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflowsets 1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard()   masks purrr::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dials::smoothness() masks discrim::smoothness()
## ✖ recipes::step()     masks stats::step()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ lubridate 1.9.4     ✔ stringr   1.5.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ scales::discard()   masks purrr::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ stringr::fixed()    masks recipes::fixed()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ readr::spec()       masks yardstick::spec()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gt)
library(discrim)  # For discriminant analysis models
library(conflicted)

# Set conflict preferences
conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package.
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
conflict_prefer("spec", "yardstick")  # Add this line to resolve the spec conflict
## [conflicted] Will prefer yardstick::spec over any other package.
# Define metrics first
auto_metrics <- metric_set(accuracy, sens, spec, ppv)

# Define metrics first
auto_metrics <- metric_set(accuracy, sens, spec, ppv)

# Then create your model fits
model_fits <-
  list(
    "LDA" = auto_workflow %>%
      add_model(discrim_linear()) %>%
      fit(data = auto_train),
    "QDA" = auto_workflow %>%
      add_model(discrim_quad()) %>%
      fit(data = auto_train),
    "logistic" = auto_workflow %>%
      add_model(logistic_reg()) %>%
      fit(data = auto_train),
    "NB" = auto_workflow %>%
      add_model(naive_Bayes()) %>%
      fit(data = auto_train),
    "KNN1" = auto_workflow %>%
      add_model(nearest_neighbor(mode = "classification", neighbors = 1)) %>%
      fit(data = auto_train),
    "KNN3" = auto_workflow %>%
      add_model(nearest_neighbor(mode = "classification", neighbors = 3)) %>%
      fit(data = auto_train),
    "KNN5" = auto_workflow %>%
      add_model(nearest_neighbor(mode = "classification", neighbors = 5)) %>%
      fit(data = auto_train),
    "KNN7" = auto_workflow %>%
      add_model(nearest_neighbor(mode = "classification", neighbors = 7)) %>%
      fit(data = auto_train)
  )

# Finally, evaluate the models
imap_dfr(
  model_fits,
  ~augment(.x, new_data = auto_test) %>%
    auto_metrics(truth = mpg01, estimate = .pred_class),
  .id = "model"
) %>%
  dplyr::select(model, .metric, .estimate) %>%
  tidyr::pivot_wider(names_from = .metric, values_from = .estimate) %>%
  dplyr::arrange(desc(accuracy)) %>%
  gt(rowname_col = "model") %>%
  fmt_percent(columns = -model, decimals = 1)
accuracy sens spec ppv
LDA 90.8% 87.5% 94.0% 93.3%
logistic 90.8% 91.7% 90.0% 89.8%
QDA 89.8% 85.4% 94.0% 93.2%
NB 89.8% 87.5% 92.0% 91.3%
KNN1 89.8% 93.8% 86.0% 86.5%
KNN3 89.8% 93.8% 86.0% 86.5%
KNN5 89.8% 91.7% 88.0% 88.0%
KNN7 89.8% 91.7% 88.0% 88.0%
boston <- ISLR2::Boston %>%
  mutate(
    crim01 = ifelse(crim > median(crim), 1, 0),
    crim01 = factor(crim01),
    # Convert the binary chas variable to TRUE/FALSE
    chas = chas == 1
  )
glimpse(boston)
## Rows: 506
## Columns: 14
## $ crim    <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn      <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus   <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
## $ nox     <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm      <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age     <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis     <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad     <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax     <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ lstat   <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv    <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…
## $ crim01  <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,…
boston %>%
  dplyr::select(-chas) %>%
  tidyr::pivot_longer(-crim01, names_to = "var", values_to = "val") %>%
  ggplot(aes(y = crim01, x = val)) +
  geom_boxplot(aes(fill = factor(crim01))) +
  facet_wrap(~var, scales = "free_x") +
  theme(legend.position = "none")

boston %>%
  count(chas, crim01) %>%
  ggplot(aes(y = chas, x = crim01)) +
  geom_tile(aes(fill = n)) +
  geom_text(aes(label = n), color = "white") +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  theme(legend.position = "none")

set.seed(98)
# By default, splits 3:1
boston_split <- initial_split(boston)
boston_train <- training(boston_split)
boston_test <- testing(boston_split)
boston_models <- list(
  "LDA" = discrim_linear(),
  "QDA" = discrim_quad(),
  "logistic" = logistic_reg(),
  "NB" = naive_Bayes(),
  "KNN1" = nearest_neighbor(mode = "classification", neighbors = 1),
  "KNN3" = nearest_neighbor(mode = "classification", neighbors = 3),
  "KNN5" = nearest_neighbor(mode = "classification", neighbors = 5),
  "KNN7" = nearest_neighbor(mode = "classification", neighbors = 7)
)
boston_recs <- list(
  "rec1" = recipe(
    crim01 ~ age + dis + indus + lstat + medv + nox + ptratio + rad + tax + zn,
    data = boston_train
  ) %>%
    step_normalize(all_numeric_predictors()),
  # Drop medv and lstat
  "rec2" = recipe(
    crim01 ~ age + dis + indus + nox + ptratio + rad + tax + zn,
    data = boston_train
  ) %>%
    step_normalize(all_numeric_predictors()),
  # Drop ptratio and tax
  "rec3" = recipe(
    crim01 ~ age + dis + indus + nox  + rad + zn,
    data = boston_train
  ) %>%
    step_normalize(all_numeric_predictors())
)

boston_fits <-
  map(
    boston_models,
    function(model) {
      map(
        boston_recs,
        ~workflow() %>%
          add_model(model) %>%
          add_recipe(.x) %>%
          fit(data = boston_train)
      )
    }
  )

boston_metrics <- metric_set(accuracy, sens, spec, ppv)
imap_dfr(
  boston_fits,
  function(fit, y) {
    imap_dfr(
      fit,
      ~augment(.x, new_data = boston_test) %>%
        boston_metrics(truth = crim01, estimate = .pred_class),
      .id = "recipe"
    )
  },
  .id = "model"
) %>%
  select(model, recipe, .metric, .estimate) %>%
  pivot_wider(names_from = .metric, values_from = .estimate) %>%
  arrange(recipe, desc(accuracy)) %>%
  group_by(recipe) %>%
  gt(rowname_col = "model") %>%
  fmt_percent(columns = -model, decimals = 1)
accuracy sens spec ppv
rec1
QDA 95.3% 94.6% 95.8% 94.6%
KNN1 94.5% 98.2% 91.5% 90.2%
KNN3 94.5% 98.2% 91.5% 90.2%
KNN5 94.5% 98.2% 91.5% 90.2%
KNN7 94.5% 98.2% 91.5% 90.2%
logistic 92.1% 98.2% 87.3% 85.9%
NB 85.0% 80.4% 88.7% 84.9%
LDA 83.5% 91.1% 77.5% 76.1%
rec2
KNN5 96.1% 100.0% 93.0% 91.8%
KNN1 95.3% 98.2% 93.0% 91.7%
KNN3 95.3% 98.2% 93.0% 91.7%
KNN7 95.3% 98.2% 93.0% 91.7%
QDA 94.5% 94.6% 94.4% 93.0%
logistic 89.0% 87.5% 90.1% 87.5%
LDA 86.6% 96.4% 78.9% 78.3%
NB 84.3% 80.4% 87.3% 83.3%
rec3
KNN5 95.3% 98.2% 93.0% 91.7%
KNN1 94.5% 98.2% 91.5% 90.2%
KNN3 94.5% 98.2% 91.5% 90.2%
KNN7 94.5% 96.4% 93.0% 91.5%
logistic 89.0% 87.5% 90.1% 87.5%
QDA 86.6% 91.1% 83.1% 81.0%
NB 86.6% 76.8% 94.4% 91.5%
LDA 84.3% 91.1% 78.9% 77.3%
boston_recs$rec2$term_info
## # A tibble: 9 × 4
##   variable type      role      source  
##   <chr>    <list>    <chr>     <chr>   
## 1 age      <chr [2]> predictor original
## 2 dis      <chr [2]> predictor original
## 3 indus    <chr [2]> predictor original
## 4 nox      <chr [2]> predictor original
## 5 ptratio  <chr [2]> predictor original
## 6 rad      <chr [2]> predictor original
## 7 tax      <chr [2]> predictor original
## 8 zn       <chr [2]> predictor original
## 9 crim01   <chr [3]> outcome   original