library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidymodels)## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.2 ──
## ✓ broom 0.7.5 ✓ recipes 0.1.15
## ✓ dials 0.0.9 ✓ rsample 0.0.9
## ✓ infer 0.5.4 ✓ tune 0.1.3
## ✓ modeldata 0.1.0 ✓ workflows 0.2.1
## ✓ parsnip 0.1.5 ✓ yardstick 0.0.7
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
library(tictoc)
library(doParallel)## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
## Loading required package: parallel
library(furrr)## Loading required package: future
library(palmerpenguins)
library(hrbrthemes)## Warning: package 'hrbrthemes' was built under R version 4.0.3
theme_set(hrbrthemes::theme_ipsum_rc())
# furrr
plan(multiprocess, workers = availableCores())## Warning in supportsMulticoreAndRStudio(...): [ONE-TIME WARNING] Forked
## processing ('multicore') is not supported when running R from RStudio
## because it is considered unstable. For more details, how to control forked
## processing or not, and how to silence this warning in future R sessions, see ?
## parallelly::supportsMulticore
# doParallel
cores <- parallel::detectCores(logical = FALSE)
cl <- makePSOCKcluster(cores)
registerDoParallel(cores = cl)
set.seed(77)penguins_data <-
palmerpenguins::penguinspenguins_df <-
penguins_data %>%
filter(!is.na(sex)) %>%
select(-year, -island)
head(penguins_df)penguins_split <-
rsample::initial_split(
penguins_df,
prop = 0.7,
strata = species
)tic(" Baseline XGBoost training duration ")
xgboost_fit <-
boost_tree() %>%
set_engine("xgboost") %>%
set_mode("classification") %>%
fit(species ~ ., data = training(penguins_split))## [22:09:51] WARNING: ../../amalgamation/../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
toc(log = TRUE)## Baseline XGBoost training duration : 0.321 sec elapsed
preds <-
predict(xgboost_fit, new_data = testing(penguins_split))
actual <-
testing(penguins_split) %>% select(species)
yardstick::f_meas_vec(truth = actual$species, estimate = preds$.pred_class)## [1] 0.9876216
ranger_model <-
parsnip::rand_forest(mtry = tune(), min_n = tune()) %>%
set_engine("ranger") %>%
set_mode("classification")
glm_model <-
parsnip::multinom_reg(penalty = tune(), mixture = tune()) %>%
set_engine("glmnet") %>%
set_mode("classification")
xgboost_model <-
parsnip::boost_tree(mtry = tune(), learn_rate = tune()) %>%
set_engine("xgboost") %>%
set_mode("classification")
pull_dials_object(glm_model, "mixture")## Proportion of lasso Penalty (quantitative)
## Range: [0, 1]
ranger_grid <-
parameters(ranger_model) %>%
finalize(select(training(penguins_split), -species)) %>% # beacuse of mtry
grid_regular(levels = 4)
ranger_gridranger_grid %>% ggplot(aes(mtry, min_n)) +
geom_point(size = 4, alpha = 0.6) +
labs(title = "Ranger: Regular grid for min_n & mtry combinations")glm_grid <-
parameters(glm_model) %>%
grid_random(size = 20)
glm_gridglm_grid %>% ggplot(aes(penalty, mixture)) +
geom_point(size = 4, alpha = 0.6) +
labs(title = "GLM: Random grid for penalty & mixture combinations")xgboost_grid <-
parameters(xgboost_model) %>%
finalize(select(training(penguins_split), -species)) %>%
grid_max_entropy(size = 20)
xgboost_gridxgboost_grid %>% ggplot(aes(mtry, learn_rate)) +
geom_point(size = 4, alpha = 0.6) +
labs(title = "XGBoost: Max Entropy grid for LR & mtry combinations")recipe_base <-
recipe(species ~ ., data = training(penguins_split)) %>%
step_dummy(all_nominal(), -all_outcomes(), one_hot = TRUE) # Create dummy variables (which glmnet needs)
recipe_1 <-
recipe_base %>%
step_YeoJohnson(all_numeric())
recipe_1 %>%
prep() %>%
juice() %>%
summary()## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## Min. : 53.94 Min. :31.23 Min. :0.6053 Min. :2.284
## 1st Qu.: 68.88 1st Qu.:39.57 1st Qu.:0.6053 1st Qu.:2.292
## Median : 80.33 Median :46.62 Median :0.6053 Median :2.296
## Mean : 78.53 Mean :45.92 Mean :0.6053 Mean :2.296
## 3rd Qu.: 88.09 3rd Qu.:51.51 3rd Qu.:0.6053 3rd Qu.:2.301
## Max. :103.60 Max. :62.13 Max. :0.6053 Max. :2.308
## species sex_female sex_male
## Adelie :103 Min. :0.0000 Min. :0.0000
## Chinstrap: 48 1st Qu.:0.0000 1st Qu.:0.0000
## Gentoo : 84 Median :0.0000 Median :1.0000
## Mean :0.4596 Mean :0.5404
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
recipe_2 <-
recipe_base %>%
step_normalize(all_numeric())
recipe_2 %>%
prep() %>%
juice() %>%
summary()## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
## Min. :-2.2517 Min. :-2.0608 Min. :-2.1141 Min. :-1.7787
## 1st Qu.:-0.8611 1st Qu.:-0.8476 1st Qu.:-0.8156 1st Qu.:-0.8244
## Median : 0.1725 Median : 0.1180 Median :-0.3107 Median :-0.1882
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.8584 3rd Qu.: 0.7617 3rd Qu.: 0.8436 3rd Qu.: 0.7662
## Max. : 2.2020 Max. : 2.0987 Max. : 2.0699 Max. : 2.6112
## species sex_female sex_male
## Adelie :103 Min. :-0.9202 Min. :-1.0821
## Chinstrap: 48 1st Qu.:-0.9202 1st Qu.:-1.0821
## Gentoo : 84 Median :-0.9202 Median : 0.9202
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 1.0821 3rd Qu.: 0.9202
## Max. : 1.0821 Max. : 0.9202
model_metrics <-
yardstick::metric_set(f_meas, pr_auc)data_penguins_3_cv_folds <-
rsample::vfold_cv(
v = 5,
data = training(penguins_split),
strata = species
)ranger_r1_workflow <-
workflows::workflow() %>%
add_model(ranger_model) %>%
add_recipe(recipe_1)
glm_r2_workflow <-
workflows::workflow() %>%
add_model(glm_model) %>%
add_recipe(recipe_2)
xgboost_r2_workflow <-
workflows::workflow() %>%
add_model(xgboost_model) %>%
add_recipe(recipe_2)tic("Ranger tune grid training duration ")
ranger_tuned <-
tune::tune_grid(
object = ranger_r1_workflow,
resamples = data_penguins_3_cv_folds,
grid = ranger_grid,
metrics = model_metrics,
control = tune::control_grid(save_pred = TRUE)
)
toc(log = TRUE)## Ranger tune grid training duration : 12.337 sec elapsed
tic("GLM tune grid training duration ")
glm_tuned <-
tune::tune_grid(
object = glm_r2_workflow,
resamples = data_penguins_3_cv_folds,
grid = glm_grid,
metrics = model_metrics,
control = tune::control_grid(save_pred = TRUE)
)
toc(log = TRUE)## GLM tune grid training duration : 15.869 sec elapsed
tic("XGBoost tune grid training duration ")
xgboost_tuned <-
tune::tune_grid(
object = xgboost_r2_workflow,
resamples = data_penguins_3_cv_folds,
grid = xgboost_grid,
metrics = model_metrics,
control = tune::control_grid(save_pred = TRUE)
)
toc(log = TRUE)## XGBoost tune grid training duration : 14.695 sec elapsed
library(finetune)## Warning: package 'finetune' was built under R version 4.0.3
tic("Tune race training duration ")
ft_xgboost_tuned <-
finetune::tune_race_anova(
object = xgboost_r2_workflow,
resamples = data_penguins_3_cv_folds,
grid = xgboost_grid,
metrics = model_metrics,
control = control_race(verbose_elim = TRUE) # 66
)## ℹ Racing will maximize the f_meas metric.
## ℹ Resamples are analyzed in a random order.
## ℹ Fold5: 12 eliminated; 8 candidates remain.
## ℹ Fold2: 1 eliminated; 7 candidates remain.
toc(log = TRUE)## Tune race training duration : 17.463 sec elapsed
plot_race(ft_xgboost_tuned) + labs(title = "Parameters Race by Fold")Each line corresponds to a tuning parameter combination. Models with suboptimal f_meas scores are eliminated quickly.
bind_cols(
tibble(model = c("Ranger", "GLM", "XGBoost")),
bind_rows(
ranger_tuned %>%
collect_metrics() %>% group_by(.metric) %>% summarise(best_va = max(mean, na.rm = TRUE)) %>% arrange(.metric) %>% pivot_wider(names_from = .metric, values_from = best_va),
glm_tuned %>%
collect_metrics() %>% group_by(.metric) %>% summarise(best_va = max(mean, na.rm = TRUE)) %>% arrange(.metric) %>% pivot_wider(names_from = .metric, values_from = best_va),
xgboost_tuned %>%
collect_metrics() %>% group_by(.metric) %>% summarise(best_va = max(mean, na.rm = TRUE)) %>% arrange(.metric) %>% pivot_wider(names_from = .metric, values_from = best_va)
)
)glm_tuned %>% collect_metrics() # 20 models and 2 metricsglm_tuned %>%
collect_metrics() %>%
group_by(.metric) %>%
summarise(best_va = max(mean, na.rm = TRUE)) %>%
arrange(.metric)glm_tuned %>% select_best(metric = "f_meas")glm_tuned %>%
collect_metrics() %>%
filter(.metric == "f_meas") %>%
select(mean, penalty, mixture) %>%
pivot_longer(penalty:mixture,
values_to = "value",
names_to = "parameter"
) %>%
ggplot(aes(value, mean, color = parameter)) +
geom_point(show.legend = FALSE) +
facet_wrap(~parameter, scales = "free_x") +
labs(x = NULL, y = "F1", title = "F1 MetricEvolution")Based on this plot we could explore a more limited space of HP
Let’s take best parameters and create the final model
best_f1 <-
select_best(xgboost_tuned, metric = "f_meas")
final_model_op1 <-
finalize_workflow(
x = xgboost_r2_workflow,
parameters = best_f1
)
final_model_op1## ══ Workflow ════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: boost_tree()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 2 Recipe Steps
##
## ● step_dummy()
## ● step_normalize()
##
## ── Model ───────────────────────────────────────────────────────────────────────
## Boosted Tree Model Specification (classification)
##
## Main Arguments:
## mtry = 5
## learn_rate = 1.95160852758891e-05
##
## Computational engine: xgboost
Let’s fit to the whole training data and then evaluates on test
tic("Train final model Tune")
penguins_last_fit <-
last_fit(final_model_op1,
penguins_split,
metrics = model_metrics
)
toc(log = TRUE)## Train final model Tune: 0.568 sec elapsed
collect_metrics(penguins_last_fit) %>%
arrange(.metric)penguins_last_fit %>%
collect_predictions() %>%
conf_mat(truth = species, estimate = .pred_class)## Truth
## Prediction Adelie Chinstrap Gentoo
## Adelie 43 6 0
## Chinstrap 0 14 0
## Gentoo 0 0 35
penguins_last_fit %>%
pull(.predictions) %>%
as.data.frame() %>%
filter(.pred_class != species)library(vip)##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
final_model_op1 %>%
fit(data = penguins_df) %>%
pull_workflow_fit() %>%
vip(
geom = "col",
aesthetics = list(fill = "steelblue")
) +
labs(title = "Feature Importance")## [22:10:54] WARNING: ../../amalgamation/../src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
This way we avoid running each tune_grid chunk code by mapping the function to a single list.
wflow_list <- list(ranger_r1_workflow, glm_r2_workflow, xgboost_r2_workflow)
grid_list <- list(ranger_grid, glm_grid, xgboost_grid)# Using furrr instead of purrr takes much less time to train
tic("Future_map2 Workflows Training duration ")
trained_models_list <- future_map2(
.x = wflow_list,
.y = grid_list,
~ tune_grid(
object = .x, # OPTIONAL finetune::tune_race_anova
grid = .y,
resamples = data_penguins_3_cv_folds,
control = tune::control_grid(save_pred = TRUE),
metrics = model_metrics
)
)
toc(log = TRUE)## Future_map2 Workflows Training duration : 44.676 sec elapsed
We have three models with 5 Folds each one.
trained_models_list## [[1]]
## # Tuning results
## # 5-fold cross-validation using stratification
## # A tibble: 5 x 5
## splits id .metrics .notes .predictions
## <list> <chr> <list> <list> <list>
## 1 <split [187/48]> Fold1 <tibble [32 × 6]> <tibble [0 × 1]> <tibble [768 × 9]>
## 2 <split [187/48]> Fold2 <tibble [32 × 6]> <tibble [0 × 1]> <tibble [768 × 9]>
## 3 <split [187/48]> Fold3 <tibble [32 × 6]> <tibble [0 × 1]> <tibble [768 × 9]>
## 4 <split [189/46]> Fold4 <tibble [32 × 6]> <tibble [0 × 1]> <tibble [736 × 9]>
## 5 <split [190/45]> Fold5 <tibble [32 × 6]> <tibble [0 × 1]> <tibble [720 × 9]>
##
## [[2]]
## # Tuning results
## # 5-fold cross-validation using stratification
## # A tibble: 5 x 5
## splits id .metrics .notes .predictions
## <list> <chr> <list> <list> <list>
## 1 <split [187/48]> Fold1 <tibble [40 × 6]> <tibble [0 × 1]> <tibble [960 × 9]>
## 2 <split [187/48]> Fold2 <tibble [40 × 6]> <tibble [0 × 1]> <tibble [960 × 9]>
## 3 <split [187/48]> Fold3 <tibble [40 × 6]> <tibble [0 × 1]> <tibble [960 × 9]>
## 4 <split [189/46]> Fold4 <tibble [40 × 6]> <tibble [0 × 1]> <tibble [920 × 9]>
## 5 <split [190/45]> Fold5 <tibble [40 × 6]> <tibble [0 × 1]> <tibble [900 × 9]>
##
## [[3]]
## Warning: This tuning result has notes. Example notes on model fitting include:
## internal: While computing multiclass `precision()`, some levels had no predicted events (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and those levels will be removed from the averaged result.
## Note that the following number of true events actually occured for each problematic event level:
## 'Chinstrap': 10
## 'Gentoo': 17
## internal: While computing multiclass `precision()`, some levels had no predicted events (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and those levels will be removed from the averaged result.
## Note that the following number of true events actually occured for each problematic event level:
## 'Chinstrap': 10
## 'Gentoo': 17
## internal: While computing multiclass `precision()`, some levels had no predicted events (i.e. `true_positive + false_positive = 0`).
## Precision is undefined in this case, and those levels will be removed from the averaged result.
## Note that the following number of true events actually occured for each problematic event level:
## 'Chinstrap': 9
## 'Gentoo': 17
## # Tuning results
## # 5-fold cross-validation using stratification
## # A tibble: 5 x 5
## splits id .metrics .notes .predictions
## <list> <chr> <list> <list> <list>
## 1 <split [187/48]> Fold1 <tibble [40 × 6]> <tibble [1 × 1]> <tibble [960 × 9]>
## 2 <split [187/48]> Fold2 <tibble [40 × 6]> <tibble [1 × 1]> <tibble [960 × 9]>
## 3 <split [187/48]> Fold3 <tibble [40 × 6]> <tibble [1 × 1]> <tibble [960 × 9]>
## 4 <split [189/46]> Fold4 <tibble [40 × 6]> <tibble [1 × 1]> <tibble [920 × 9]>
## 5 <split [190/45]> Fold5 <tibble [40 × 6]> <tibble [1 × 1]> <tibble [900 × 9]>
First model and second fold metrics for each HP value
trained_models_list[[1]]$.metrics[[2]]Let’s compute the mean estimate for the second model, for every fold and select the best combinations of HP
show_best(trained_models_list[[2]], metric = "f_meas", n = 1)Now we’ll do the same for every model
map(trained_models_list, show_best, metric = "f_meas", n = 1)## [[1]]
## # A tibble: 1 x 8
## mtry min_n .metric .estimator mean n std_err .config
## <int> <int> <chr> <chr> <dbl> <int> <dbl> <fct>
## 1 1 2 f_meas macro 0.989 5 0.00679 Preprocessor1_Model01
##
## [[2]]
## # A tibble: 1 x 8
## penalty mixture .metric .estimator mean n std_err .config
## <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <fct>
## 1 0.00000971 0.993 f_meas macro 0.996 5 0.00378 Preprocessor1_Model…
##
## [[3]]
## # A tibble: 1 x 8
## mtry learn_rate .metric .estimator mean n std_err .config
## <int> <dbl> <chr> <chr> <dbl> <int> <dbl> <fct>
## 1 4 0.0525 f_meas macro 0.963 5 0.0110 Preprocessor1_Model10
Let’s take best parameters and create the final model
furrr_best_f1 <-
select_best(trained_models_list[[2]], metric = "f_meas")
final_model_furrr <-
finalize_workflow(
glm_r2_workflow,
furrr_best_f1
)
final_model_furrr## ══ Workflow ════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: multinom_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 2 Recipe Steps
##
## ● step_dummy()
## ● step_normalize()
##
## ── Model ───────────────────────────────────────────────────────────────────────
## Multinomial Regression Model Specification (classification)
##
## Main Arguments:
## penalty = 9.71498126111899e-06
## mixture = 0.992555894423276
##
## Computational engine: glmnet
tic("Train final model Furrr ")
penguins_last_fit_furrr <-
last_fit(final_model_furrr,
penguins_split,
metrics = model_metrics
)
toc(log = TRUE)## Train final model Furrr : 0.609 sec elapsed
collect_metrics(penguins_last_fit_furrr) %>%
arrange(.metric)penguins_last_fit_furrr %>%
collect_predictions() %>%
conf_mat(truth = species, estimate = .pred_class)## Truth
## Prediction Adelie Chinstrap Gentoo
## Adelie 43 0 0
## Chinstrap 0 20 0
## Gentoo 0 0 35
penguins_last_fit_furrr %>%
pull(.predictions) %>%
as.data.frame() %>%
filter(.pred_class != species)This way we can combine all possible models + recipes. A drawback is that you can’t assign custom tuning grids, it will use grid_latin_hypercube by default.
# library("devtools")
# devtools::install_github("tidymodels/workflowsets")
library(workflowsets)
wfs_models <-
workflow_set(
models = list(
ranger = ranger_model,
glm = glm_model,
xgb = xgboost_model
),
preproc = list(
rec_yj = recipe_1,
rec_norm = recipe_2
),
cross = TRUE
)
wfs_modelsAssuming Ranger Model doesn’t work with recipe_2 specifications, we can remove it:
wfs_models <-
wfs_models %>%
anti_join(tibble(wflow_id = c("rec_norm_ranger")), by = "wflow_id")
wfs_modelstic("WF Sets Training time ")
wfs_models <-
workflow_map(
object = wfs_models,
fn = "tune_grid", # OPTIONAL finetune::tune_race_anova
resamples = data_penguins_3_cv_folds,
grid = 10,
metrics = model_metrics,
verbose = TRUE
)## i 1 of 5 tuning: rec_yj_ranger
## i Creating pre-processing data to finalize unknown parameter: mtry
## ✓ 1 of 5 tuning: rec_yj_ranger (7.2s)
## i 2 of 5 tuning: rec_yj_glm
## ✓ 2 of 5 tuning: rec_yj_glm (7.1s)
## i 3 of 5 tuning: rec_yj_xgb
## i Creating pre-processing data to finalize unknown parameter: mtry
## ✓ 3 of 5 tuning: rec_yj_xgb (7.2s)
## i 4 of 5 tuning: rec_norm_glm
## ✓ 4 of 5 tuning: rec_norm_glm (7.5s)
## i 5 of 5 tuning: rec_norm_xgb
## i Creating pre-processing data to finalize unknown parameter: mtry
## ✓ 5 of 5 tuning: rec_norm_xgb (7s)
toc()## WF Sets Training time : 37.499 sec elapsed
We have 5 models with 10 HP => 50 models to train
autoplot(wfs_models, metric = "f_meas") + labs(title = "Ranked Kfolds Models Preformance")autoplot(wfs_models, metric = "f_meas", select_best = TRUE) + labs(title = "Models Performance Ranking")rank_results(wfs_models, rank_metric = "f_meas", select_best = TRUE)Get the best model
wfs_glm_best_wf <-
wfs_models %>%
pull_workflow("rec_yj_glm") # Since all workflows are in the WFS object !!!
wfs_glm_best_wf## ══ Workflow ════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: multinom_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 2 Recipe Steps
##
## ● step_dummy()
## ● step_YeoJohnson()
##
## ── Model ───────────────────────────────────────────────────────────────────────
## Multinomial Regression Model Specification (classification)
##
## Main Arguments:
## penalty = tune()
## mixture = tune()
##
## Computational engine: glmnet
Get the best parameters and finalize
wfs_best_f1 <-
wfs_models %>%
pull_workflow_result("rec_yj_glm") %>%
select_best(metric = "f_meas")
final_model_wfs <-
finalize_workflow(
x = wfs_glm_best_wf,
parameters = wfs_best_f1
)
final_model_wfs## ══ Workflow ════════════════════════════════════════════════════════════════════
## Preprocessor: Recipe
## Model: multinom_reg()
##
## ── Preprocessor ────────────────────────────────────────────────────────────────
## 2 Recipe Steps
##
## ● step_dummy()
## ● step_YeoJohnson()
##
## ── Model ───────────────────────────────────────────────────────────────────────
## Multinomial Regression Model Specification (classification)
##
## Main Arguments:
## penalty = 5.35782793767896e-05
## mixture = 0.0286454205168411
##
## Computational engine: glmnet
tic("Train final model WFS")
penguins_last_fit_wfs <-
last_fit(final_model_wfs,
penguins_split,
metrics = model_metrics
)
toc(log = TRUE)## Train final model WFS: 0.763 sec elapsed
collect_metrics(penguins_last_fit_wfs) %>%
arrange(.metric)penguins_last_fit_wfs %>%
collect_predictions() %>%
conf_mat(truth = species, estimate = .pred_class)## Truth
## Prediction Adelie Chinstrap Gentoo
## Adelie 43 1 0
## Chinstrap 0 19 0
## Gentoo 0 0 35
penguins_last_fit_wfs %>%
pull(.predictions) %>%
as.data.frame() %>%
filter(.pred_class != species)tic.log() %>%
unlist() %>%
tibble()sessionInfo()## R version 3.6.3 (2020-02-29)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Manjaro Linux
##
## Matrix products: default
## BLAS/LAPACK: /home/marceluss/miniconda3/envs/r-env/lib/libopenblasp-r0.3.12.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] workflowsets_0.0.0.9001 vip_0.3.2 finetune_0.0.1
## [4] xgboost_1.3.0.1 glmnet_4.1 Matrix_1.3-2
## [7] ranger_0.12.1 vctrs_0.3.6 rlang_0.4.10
## [10] hrbrthemes_0.8.0 palmerpenguins_0.1.0 furrr_0.2.2
## [13] future_1.21.0 doParallel_1.0.16 iterators_1.0.13
## [16] foreach_1.5.1 tictoc_1.0 yardstick_0.0.7
## [19] workflows_0.2.1 tune_0.1.3 rsample_0.0.9
## [22] recipes_0.1.15 parsnip_0.1.5 modeldata_0.1.0
## [25] infer_0.5.4 dials_0.0.9 scales_1.1.1
## [28] broom_0.7.5 tidymodels_0.1.2 forcats_0.5.1
## [31] stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4
## [34] readr_1.3.1 tidyr_1.1.2 tibble_3.1.0
## [37] ggplot2_3.3.3 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 colorspace_2.0-0 ellipsis_0.3.1 class_7.3-18
## [5] fs_1.5.0 rstudioapi_0.13 farver_2.1.0 listenv_0.8.0
## [9] prodlim_2019.11.13 fansi_0.4.2 lubridate_1.7.10 xml2_1.3.2
## [13] codetools_0.2-18 splines_3.6.3 extrafont_0.17 knitr_1.31
## [17] jsonlite_1.7.2 nloptr_1.2.2.2 pROC_1.17.0.1 Rttf2pt1_1.3.8
## [21] dbplyr_2.1.0 compiler_3.6.3 httr_1.4.2 backports_1.2.1
## [25] assertthat_0.2.1 cli_2.3.1 prettyunits_1.1.1 htmltools_0.5.1.1
## [29] tools_3.6.3 gtable_0.3.0 glue_1.4.2 Rcpp_1.0.6
## [33] cellranger_1.1.0 jquerylib_0.1.3 DiceDesign_1.9 nlme_3.1-152
## [37] extrafontdb_1.0 timeDate_3043.102 gower_0.2.2 xfun_0.20
## [41] globals_0.14.0 lme4_1.1-26 rvest_0.3.6 lifecycle_1.0.0
## [45] statmod_1.4.35 MASS_7.3-53.1 ipred_0.9-9 hms_1.0.0
## [49] yaml_2.2.1 gridExtra_2.3 gdtools_0.2.3 sass_0.3.1
## [53] rpart_4.1-15 stringi_1.5.3 highr_0.8 lhs_1.1.1
## [57] hardhat_0.1.5 boot_1.3-26 shape_1.4.5 lava_1.6.8.1
## [61] systemfonts_0.3.2 pkgconfig_2.0.3 evaluate_0.14 lattice_0.20-41
## [65] labeling_0.4.2 tidyselect_1.1.0 parallelly_1.23.0 plyr_1.8.6
## [69] magrittr_2.0.1 R6_2.5.0 generics_0.1.0 DBI_1.1.1
## [73] pillar_1.5.0 haven_2.3.1 withr_2.4.1 survival_3.2-7
## [77] nnet_7.3-15 modelr_0.1.8 crayon_1.4.1 utf8_1.1.4
## [81] rmarkdown_2.7 grid_3.6.3 readxl_1.3.1 data.table_1.13.6
## [85] reprex_1.0.0 digest_0.6.27 munsell_0.5.0 GPfit_1.0-8
## [89] bslib_0.2.4