library(yardstick)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(purrr)
library(gt)
4.8 13
library(ISLR2)
library(skimr)
weekly <- ISLR2::Weekly
skimr::skim(weekly)
Data summary
Name |
weekly |
Number of rows |
1089 |
Number of columns |
9 |
_______________________ |
|
Column type frequency: |
|
factor |
1 |
numeric |
8 |
________________________ |
|
Group variables |
None |
Variable type: factor
Direction |
0 |
1 |
FALSE |
2 |
Up: 605, Dow: 484 |
Variable type: numeric
Year |
0 |
1 |
2000.05 |
6.03 |
1990.00 |
1995.00 |
2000.00 |
2005.00 |
2010.00 |
▇▆▆▆▆ |
Lag1 |
0 |
1 |
0.15 |
2.36 |
-18.20 |
-1.15 |
0.24 |
1.41 |
12.03 |
▁▁▆▇▁ |
Lag2 |
0 |
1 |
0.15 |
2.36 |
-18.20 |
-1.15 |
0.24 |
1.41 |
12.03 |
▁▁▆▇▁ |
Lag3 |
0 |
1 |
0.15 |
2.36 |
-18.20 |
-1.16 |
0.24 |
1.41 |
12.03 |
▁▁▆▇▁ |
Lag4 |
0 |
1 |
0.15 |
2.36 |
-18.20 |
-1.16 |
0.24 |
1.41 |
12.03 |
▁▁▆▇▁ |
Lag5 |
0 |
1 |
0.14 |
2.36 |
-18.20 |
-1.17 |
0.23 |
1.41 |
12.03 |
▁▁▆▇▁ |
Volume |
0 |
1 |
1.57 |
1.69 |
0.09 |
0.33 |
1.00 |
2.05 |
9.33 |
▇▂▁▁▁ |
Today |
0 |
1 |
0.15 |
2.36 |
-18.20 |
-1.15 |
0.24 |
1.41 |
12.03 |
▁▁▆▇▁ |
library(dplyr)
library(corrr)
##
## Attaching package: 'corrr'
## The following object is masked from 'package:skimr':
##
## focus
library(gt)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
library(RColorBrewer)
weekly %>%
dplyr::select(-Direction) %>%
corrr::correlate(method = "pearson", quiet = TRUE) %>%
gt(rowname_col = "term") %>%
gt::fmt_missing(columns = everything(), missing_text = "") %>%
gt::data_color(
columns = everything(),
colors = scales::col_numeric(
palette = brewer.pal(5, "RdBu"), # Alternative to td_pal("div5")
domain = c(-0.1, 0.9)
)
) %>%
gt::fmt_number(columns = everything(), decimals = 3)
## Warning: Since gt v0.6.0 `fmt_missing()` is deprecated and will soon be removed.
## ℹ Use `sub_missing()` instead.
## This warning is displayed once every 8 hours.
## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `fn` argument instead.
## This warning is displayed once every 8 hours.
|
Year |
Lag1 |
Lag2 |
Lag3 |
Lag4 |
Lag5 |
Volume |
Today |
Year |
|
−0.032 |
−0.033 |
−0.030 |
−0.031 |
−0.031 |
0.842 |
−0.032 |
Lag1 |
−0.032 |
|
−0.075 |
0.059 |
−0.071 |
−0.008 |
−0.065 |
−0.075 |
Lag2 |
−0.033 |
−0.075 |
|
−0.076 |
0.058 |
−0.072 |
−0.086 |
0.059 |
Lag3 |
−0.030 |
0.059 |
−0.076 |
|
−0.075 |
0.061 |
−0.069 |
−0.071 |
Lag4 |
−0.031 |
−0.071 |
0.058 |
−0.075 |
|
−0.076 |
−0.061 |
−0.008 |
Lag5 |
−0.031 |
−0.008 |
−0.072 |
0.061 |
−0.076 |
|
−0.059 |
0.011 |
Volume |
0.842 |
−0.065 |
−0.086 |
−0.069 |
−0.061 |
−0.059 |
|
−0.033 |
Today |
−0.032 |
−0.075 |
0.059 |
−0.071 |
−0.008 |
0.011 |
−0.033 |
|
library(ggplot2)
weekly %>%
ggplot(aes(x = factor(Year), y = Volume)) +
geom_jitter(width = 0.3, color = "yellow") +
geom_boxplot(alpha = 0.3, outlier.shape = NA, width = 0.2)

library(forcats)
weekly <- weekly %>% mutate(Direction = fct_rev(Direction))
library(broom)
library(parsnip)
lr_weekly_fit <- logistic_reg() %>%
fit(Direction ~ Year + Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = weekly)
tidy(lr_weekly_fit) %>% gt()
term |
estimate |
std.error |
statistic |
p.value |
(Intercept) |
-17.225822230 |
37.89052190 |
-0.45462088 |
0.6493820 |
Year |
0.008499918 |
0.01899083 |
0.44758011 |
0.6544563 |
Lag1 |
0.040687571 |
0.02644652 |
1.53848459 |
0.1239302 |
Lag2 |
-0.059448637 |
0.02697031 |
-2.20422531 |
0.0275085 |
Lag3 |
0.015477987 |
0.02670309 |
0.57963289 |
0.5621622 |
Lag4 |
0.027316278 |
0.02648478 |
1.03139539 |
0.3023554 |
Lag5 |
0.014022185 |
0.02640947 |
0.53095285 |
0.5954515 |
Volume |
-0.003256253 |
0.06883640 |
-0.04730423 |
0.9622708 |
library(yardstick)
lr_weekly_fit_conf_mat <- augment(lr_weekly_fit, weekly) %>%
conf_mat(truth = Direction, estimate = .pred_class)
lr_weekly_fit_conf_mat
## Truth
## Prediction Up Down
## Up 558 428
## Down 47 56
weekly_train <- weekly %>% filter(Year <= 2008)
weekly_test <- weekly %>% filter(Year > 2008)
lr_weekly_fit_lag2 <-
logistic_reg() %>%
fit(Direction ~ Lag2, data = weekly_train)
lr_weekly_fit_lag2_conf_mat <-
augment(lr_weekly_fit_lag2, weekly_test) %>%
conf_mat(truth = Direction, estimate = .pred_class)
lr_weekly_fit_lag2_conf_mat
## Truth
## Prediction Up Down
## Up 56 34
## Down 5 9
library(discrim)
model_fits <- list(
"logistic" = lr_weekly_fit_lag2, # Your logistic regression model
"LDA" = discrim_linear() %>% fit(Direction ~ Lag2, data = weekly_train),
"QDA" = discrim_quad() %>% fit(Direction ~ Lag2, data = weekly_train),
"KNN1" = nearest_neighbor(mode = "classification", neighbors = 1) %>%
fit(Direction ~ Lag2, data = weekly_train),
"NB" = naive_Bayes() %>% fit(Direction ~ Lag2, data = weekly_train)
)
library(dplyr)
library(tidyr)
library(gt)
library(purrr)
# Assuming `weekly_test` and `weekly_train` are already loaded and available
model_fits <- list(
"logistic" = lr_weekly_fit_lag2, # Your logistic regression model
"LDA" = discrim_linear() %>% fit(Direction ~ Lag2, data = weekly_train),
"QDA" = discrim_quad() %>% fit(Direction ~ Lag2, data = weekly_train),
"KNN1" = nearest_neighbor(mode = "classification", neighbors = 1) %>%
fit(Direction ~ Lag2, data = weekly_train),
"NB" = naive_Bayes() %>% fit(Direction ~ Lag2, data = weekly_train)
)
# Define the metric set
weekly_metrics <- metric_set(accuracy, sens, spec, ppv)
# Generate performance metrics for each model
output <- imap_dfr(
model_fits,
~ augment(.x, new_data = weekly_test) %>%
weekly_metrics(truth = Direction, estimate = .pred_class) %>%
mutate(model = .y), # Add model name
.id = "model"
)
# Check structure of the output
glimpse(output)
## Rows: 20
## Columns: 4
## $ .metric <chr> "accuracy", "sens", "spec", "ppv", "accuracy", "sens", "spe…
## $ .estimator <chr> "binary", "binary", "binary", "binary", "binary", "binary",…
## $ .estimate <dbl> 0.6250000, 0.9180328, 0.2093023, 0.6222222, 0.6250000, 0.91…
## $ model <chr> "logistic", "logistic", "logistic", "logistic", "LDA", "LDA…
library(dplyr) # Ensure dplyr is loaded
library(tidyr) # For pivot_wider
library(gt) # For creating the table
output %>%
dplyr::select(model, .metric, .estimate) %>%
tidyr::pivot_wider(names_from = .metric, values_from = .estimate) %>%
gt(rowname_col = "model") %>%
gt::fmt_percent(columns = -model)
|
accuracy |
sens |
spec |
ppv |
logistic |
62.50% |
91.80% |
20.93% |
62.22% |
LDA |
62.50% |
91.80% |
20.93% |
62.22% |
QDA |
58.65% |
100.00% |
0.00% |
58.65% |
KNN1 |
50.00% |
49.18% |
51.16% |
58.82% |
NB |
60.58% |
91.80% |
16.28% |
60.87% |
auto <- ISLR2::Auto %>%
mutate(mpg01 = ifelse(mpg > median(mpg), 1, 0),
mpg01 = factor(mpg01))
glimpse(auto)
## Rows: 392
## Columns: 10
## $ mpg <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, 15, 14, 15, 14, 2…
## $ cylinders <int> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4, …
## $ displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 383, 34…
## $ horsepower <int> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 170, 16…
## $ weight <int> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425, 385…
## $ acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8.5, …
## $ year <int> 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 7…
## $ origin <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, …
## $ name <fct> chevrolet chevelle malibu, buick skylark 320, plymouth sa…
## $ mpg01 <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, …
auto <- auto %>%
mutate(origin = factor(origin, levels = 1:3,
labels = c("American", "European", "Japanese")))
library(dplyr) # Ensure dplyr is loaded
library(tidyr) # For pivot_longer
library(ggplot2) # For ggplot
library(ggplot2) # For adding facet borders, if necessary
auto %>%
dplyr::select(-name, -origin, -mpg) %>%
pivot_longer(-mpg01, names_to = "var", values_to = "val") %>%
ggplot(aes(y = mpg01, x = val)) +
geom_boxplot(aes(fill = factor(mpg01))) +
facet_wrap(~var, scales = "free_x") +
theme(
legend.position = "none",
strip.background = element_rect(color = "black", fill = "lightgray", size = 1), # Adds border to facet labels
strip.border = element_rect(color = "black", size = 1) # Adds border to facet labels
)
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in plot_theme(plot): The `strip.border` theme element is not defined in
## the element hierarchy.

auto %>%
count(origin, mpg01) %>%
ggplot(aes(y = origin, x = mpg01)) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n), color = "white") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme(legend.position = "none")

auto %>%
dplyr::select(-c(name, mpg01, origin)) %>%
corrr::correlate(method = "pearson", quiet = TRUE) %>%
gt(rowname_col = "term") %>%
gt::fmt_missing(columns = everything(), missing_text = "") %>%
gt::data_color(
columns = everything(),
colors = scales::col_numeric(
palette = "RdBu", # You can adjust the palette as needed
domain = c(-1, 1)
)
) %>%
gt::fmt_number(columns = everything(), decimals = 2)
|
mpg |
cylinders |
displacement |
horsepower |
weight |
acceleration |
year |
mpg |
|
−0.78 |
−0.81 |
−0.78 |
−0.83 |
0.42 |
0.58 |
cylinders |
−0.78 |
|
0.95 |
0.84 |
0.90 |
−0.50 |
−0.35 |
displacement |
−0.81 |
0.95 |
|
0.90 |
0.93 |
−0.54 |
−0.37 |
horsepower |
−0.78 |
0.84 |
0.90 |
|
0.86 |
−0.69 |
−0.42 |
weight |
−0.83 |
0.90 |
0.93 |
0.86 |
|
−0.42 |
−0.31 |
acceleration |
0.42 |
−0.50 |
−0.54 |
−0.69 |
−0.42 |
|
0.29 |
year |
0.58 |
−0.35 |
−0.37 |
−0.42 |
−0.31 |
0.29 |
|
library(rsample) # Load the package
set.seed(49)
auto_split <- initial_split(auto, prop = 3 / 4)
auto_train <- training(auto_split)
auto_test <- testing(auto_split)
auto_train %>% count(mpg01)
## mpg01 n
## 1 0 148
## 2 1 146
auto_test %>% count(mpg01)
## mpg01 n
## 1 0 48
## 2 1 50
library(recipes) # Load the package
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
auto_recipe <- recipe(
mpg01 ~ cylinders + displacement + horsepower + weight + acceleration +
year + origin,
data = auto_train
) %>%
# Normalize numerical predictors to work with KNN
step_normalize(all_numeric_predictors()) %>%
step_dummy(origin)
library(workflows) # Load the package
auto_workflow <- workflow() %>%
add_recipe(auto_recipe)
# Load all required packages first
library(tidymodels) # This includes dplyr, purrr, etc.
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ dials 1.4.0 ✔ tibble 3.2.1
## ✔ infer 1.0.7 ✔ tune 1.3.0
## ✔ modeldata 1.4.0 ✔ workflowsets 1.1.0
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dials::smoothness() masks discrim::smoothness()
## ✖ recipes::step() masks stats::step()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ lubridate 1.9.4 ✔ stringr 1.5.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ stringr::fixed() masks recipes::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ readr::spec() masks yardstick::spec()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gt)
library(discrim) # For discriminant analysis models
library(conflicted)
# Set conflict preferences
conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package.
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package.
conflict_prefer("spec", "yardstick") # Add this line to resolve the spec conflict
## [conflicted] Will prefer yardstick::spec over any other package.
# Define metrics first
auto_metrics <- metric_set(accuracy, sens, spec, ppv)
# Define metrics first
auto_metrics <- metric_set(accuracy, sens, spec, ppv)
# Then create your model fits
model_fits <-
list(
"LDA" = auto_workflow %>%
add_model(discrim_linear()) %>%
fit(data = auto_train),
"QDA" = auto_workflow %>%
add_model(discrim_quad()) %>%
fit(data = auto_train),
"logistic" = auto_workflow %>%
add_model(logistic_reg()) %>%
fit(data = auto_train),
"NB" = auto_workflow %>%
add_model(naive_Bayes()) %>%
fit(data = auto_train),
"KNN1" = auto_workflow %>%
add_model(nearest_neighbor(mode = "classification", neighbors = 1)) %>%
fit(data = auto_train),
"KNN3" = auto_workflow %>%
add_model(nearest_neighbor(mode = "classification", neighbors = 3)) %>%
fit(data = auto_train),
"KNN5" = auto_workflow %>%
add_model(nearest_neighbor(mode = "classification", neighbors = 5)) %>%
fit(data = auto_train),
"KNN7" = auto_workflow %>%
add_model(nearest_neighbor(mode = "classification", neighbors = 7)) %>%
fit(data = auto_train)
)
# Finally, evaluate the models
imap_dfr(
model_fits,
~augment(.x, new_data = auto_test) %>%
auto_metrics(truth = mpg01, estimate = .pred_class),
.id = "model"
) %>%
dplyr::select(model, .metric, .estimate) %>%
tidyr::pivot_wider(names_from = .metric, values_from = .estimate) %>%
dplyr::arrange(desc(accuracy)) %>%
gt(rowname_col = "model") %>%
fmt_percent(columns = -model, decimals = 1)
|
accuracy |
sens |
spec |
ppv |
LDA |
90.8% |
87.5% |
94.0% |
93.3% |
logistic |
90.8% |
91.7% |
90.0% |
89.8% |
QDA |
89.8% |
85.4% |
94.0% |
93.2% |
NB |
89.8% |
87.5% |
92.0% |
91.3% |
KNN1 |
89.8% |
93.8% |
86.0% |
86.5% |
KNN3 |
89.8% |
93.8% |
86.0% |
86.5% |
KNN5 |
89.8% |
91.7% |
88.0% |
88.0% |
KNN7 |
89.8% |
91.7% |
88.0% |
88.0% |
boston <- ISLR2::Boston %>%
mutate(
crim01 = ifelse(crim > median(crim), 1, 0),
crim01 = factor(crim01),
# Convert the binary chas variable to TRUE/FALSE
chas = chas == 1
)
glimpse(boston)
## Rows: 506
## Columns: 14
## $ crim <dbl> 0.00632, 0.02731, 0.02729, 0.03237, 0.06905, 0.02985, 0.08829,…
## $ zn <dbl> 18.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.5, 12.5, 12.5, 12.5, 12.5, 1…
## $ indus <dbl> 2.31, 7.07, 7.07, 2.18, 2.18, 2.18, 7.87, 7.87, 7.87, 7.87, 7.…
## $ chas <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
## $ nox <dbl> 0.538, 0.469, 0.469, 0.458, 0.458, 0.458, 0.524, 0.524, 0.524,…
## $ rm <dbl> 6.575, 6.421, 7.185, 6.998, 7.147, 6.430, 6.012, 6.172, 5.631,…
## $ age <dbl> 65.2, 78.9, 61.1, 45.8, 54.2, 58.7, 66.6, 96.1, 100.0, 85.9, 9…
## $ dis <dbl> 4.0900, 4.9671, 4.9671, 6.0622, 6.0622, 6.0622, 5.5605, 5.9505…
## $ rad <int> 1, 2, 2, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ tax <dbl> 296, 242, 242, 222, 222, 222, 311, 311, 311, 311, 311, 311, 31…
## $ ptratio <dbl> 15.3, 17.8, 17.8, 18.7, 18.7, 18.7, 15.2, 15.2, 15.2, 15.2, 15…
## $ lstat <dbl> 4.98, 9.14, 4.03, 2.94, 5.33, 5.21, 12.43, 19.15, 29.93, 17.10…
## $ medv <dbl> 24.0, 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15…
## $ crim01 <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,…
boston %>%
dplyr::select(-chas) %>%
tidyr::pivot_longer(-crim01, names_to = "var", values_to = "val") %>%
ggplot(aes(y = crim01, x = val)) +
geom_boxplot(aes(fill = factor(crim01))) +
facet_wrap(~var, scales = "free_x") +
theme(legend.position = "none")

boston %>%
count(chas, crim01) %>%
ggplot(aes(y = chas, x = crim01)) +
geom_tile(aes(fill = n)) +
geom_text(aes(label = n), color = "white") +
scale_x_discrete(expand = c(0, 0)) +
scale_y_discrete(expand = c(0, 0)) +
theme(legend.position = "none")

set.seed(98)
# By default, splits 3:1
boston_split <- initial_split(boston)
boston_train <- training(boston_split)
boston_test <- testing(boston_split)
boston_models <- list(
"LDA" = discrim_linear(),
"QDA" = discrim_quad(),
"logistic" = logistic_reg(),
"NB" = naive_Bayes(),
"KNN1" = nearest_neighbor(mode = "classification", neighbors = 1),
"KNN3" = nearest_neighbor(mode = "classification", neighbors = 3),
"KNN5" = nearest_neighbor(mode = "classification", neighbors = 5),
"KNN7" = nearest_neighbor(mode = "classification", neighbors = 7)
)
boston_recs <- list(
"rec1" = recipe(
crim01 ~ age + dis + indus + lstat + medv + nox + ptratio + rad + tax + zn,
data = boston_train
) %>%
step_normalize(all_numeric_predictors()),
# Drop medv and lstat
"rec2" = recipe(
crim01 ~ age + dis + indus + nox + ptratio + rad + tax + zn,
data = boston_train
) %>%
step_normalize(all_numeric_predictors()),
# Drop ptratio and tax
"rec3" = recipe(
crim01 ~ age + dis + indus + nox + rad + zn,
data = boston_train
) %>%
step_normalize(all_numeric_predictors())
)
boston_fits <-
map(
boston_models,
function(model) {
map(
boston_recs,
~workflow() %>%
add_model(model) %>%
add_recipe(.x) %>%
fit(data = boston_train)
)
}
)
boston_metrics <- metric_set(accuracy, sens, spec, ppv)
imap_dfr(
boston_fits,
function(fit, y) {
imap_dfr(
fit,
~augment(.x, new_data = boston_test) %>%
boston_metrics(truth = crim01, estimate = .pred_class),
.id = "recipe"
)
},
.id = "model"
) %>%
select(model, recipe, .metric, .estimate) %>%
pivot_wider(names_from = .metric, values_from = .estimate) %>%
arrange(recipe, desc(accuracy)) %>%
group_by(recipe) %>%
gt(rowname_col = "model") %>%
fmt_percent(columns = -model, decimals = 1)
|
accuracy |
sens |
spec |
ppv |
rec1 |
QDA |
95.3% |
94.6% |
95.8% |
94.6% |
KNN1 |
94.5% |
98.2% |
91.5% |
90.2% |
KNN3 |
94.5% |
98.2% |
91.5% |
90.2% |
KNN5 |
94.5% |
98.2% |
91.5% |
90.2% |
KNN7 |
94.5% |
98.2% |
91.5% |
90.2% |
logistic |
92.1% |
98.2% |
87.3% |
85.9% |
NB |
85.0% |
80.4% |
88.7% |
84.9% |
LDA |
83.5% |
91.1% |
77.5% |
76.1% |
rec2 |
KNN5 |
96.1% |
100.0% |
93.0% |
91.8% |
KNN1 |
95.3% |
98.2% |
93.0% |
91.7% |
KNN3 |
95.3% |
98.2% |
93.0% |
91.7% |
KNN7 |
95.3% |
98.2% |
93.0% |
91.7% |
QDA |
94.5% |
94.6% |
94.4% |
93.0% |
logistic |
89.0% |
87.5% |
90.1% |
87.5% |
LDA |
86.6% |
96.4% |
78.9% |
78.3% |
NB |
84.3% |
80.4% |
87.3% |
83.3% |
rec3 |
KNN5 |
95.3% |
98.2% |
93.0% |
91.7% |
KNN1 |
94.5% |
98.2% |
91.5% |
90.2% |
KNN3 |
94.5% |
98.2% |
91.5% |
90.2% |
KNN7 |
94.5% |
96.4% |
93.0% |
91.5% |
logistic |
89.0% |
87.5% |
90.1% |
87.5% |
QDA |
86.6% |
91.1% |
83.1% |
81.0% |
NB |
86.6% |
76.8% |
94.4% |
91.5% |
LDA |
84.3% |
91.1% |
78.9% |
77.3% |
boston_recs$rec2$term_info
## # A tibble: 9 × 4
## variable type role source
## <chr> <list> <chr> <chr>
## 1 age <chr [2]> predictor original
## 2 dis <chr [2]> predictor original
## 3 indus <chr [2]> predictor original
## 4 nox <chr [2]> predictor original
## 5 ptratio <chr [2]> predictor original
## 6 rad <chr [2]> predictor original
## 7 tax <chr [2]> predictor original
## 8 zn <chr [2]> predictor original
## 9 crim01 <chr [3]> outcome original