library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.0 v purrr 0.3.3
v tibble 3.0.1 v dplyr 1.0.0
v tidyr 1.0.2 v stringr 1.4.0
v readr 1.3.1 v forcats 0.5.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(tidymodels)
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
-- Attaching packages -------------------------------------- tidymodels 0.1.0 --
v broom 0.5.5 v rsample 0.0.6
v dials 0.0.6 v tune 0.1.0
v infer 0.5.1 v workflows 0.1.1
v parsnip 0.1.1 v yardstick 0.0.6
v recipes 0.1.12
-- Conflicts ----------------------------------------- tidymodels_conflicts() --
x scales::discard() masks purrr::discard()
x dplyr::filter() masks stats::filter()
x recipes::fixed() masks stringr::fixed()
x dplyr::lag() masks stats::lag()
x dials::margin() masks ggplot2::margin()
x yardstick::spec() masks readr::spec()
x recipes::step() masks stats::step()
library(palmerpenguins)
penguins %>%
filter(!is.na(sex)) %>%
ggplot(aes(flipper_length_mm, bill_length_mm, color = sex, size = body_mass_g)) +
geom_point(alpha = 0.5) +
facet_wrap(~species)

NA
NA
penguins_df <- penguins %>%
filter(!is.na(sex)) %>%
select(-year, -island)
library(tidymodels)
set.seed(123)
penguin_split <- initial_split(penguins_df, strata = sex)
penguin_train <- training(penguin_split)
penguin_test <- testing(penguin_split)
set.seed(123)
penguin_boot <- bootstraps(penguin_train)
penguin_boot
# Bootstrap sampling
glm_spec <- logistic_reg() %>%
set_engine("glm")
glm_spec
Logistic Regression Model Specification (classification)
Computational engine: glm
## Logistic Regression Model Specification (classification)
##
## Computational engine: glm
rf_spec <- rand_forest() %>%
set_mode("classification") %>%
set_engine("ranger")
rf_spec
Random Forest Model Specification (classification)
Computational engine: ranger
## Random Forest Model Specification (classification)
##
## Computational engine: ranger
penguin_wf <- workflow() %>%
add_formula(sex ~ .)
penguin_wf
== Workflow ====================================================================
Preprocessor: Formula
Model: None
-- Preprocessor ----------------------------------------------------------------
sex ~ .
glm_rs <- penguin_wf %>%
add_model(glm_spec) %>%
fit_resamples(
resamples = penguin_boot,
control = control_resamples(save_pred = TRUE)
)
! Bootstrap01: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap02: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap03: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap04: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap05: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap06: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap07: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap08: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap09: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap10: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap11: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap12: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap13: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap14: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap15: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap16: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap17: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap18: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap19: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap20: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap21: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap22: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap23: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap24: model (predictions): prediction from a rank-deficient fit may be misleading
! Bootstrap25: model (predictions): prediction from a rank-deficient fit may be misleading
glm_rs
# Bootstrap sampling
rf_rs <- penguin_wf %>%
add_model(rf_spec) %>%
fit_resamples(
resamples = penguin_boot,
control = control_resamples(save_pred = TRUE)
)
rf_rs
# Bootstrap sampling
collect_metrics(rf_rs)
collect_metrics(glm_rs)
NA
glm_rs %>%
collect_predictions() %>%
group_by(id) %>%
roc_curve(sex, .pred_female) %>%
ggplot(aes(1 - specificity, sensitivity, color = id)) +
geom_abline(lty = 2, color = "gray80", size = 1.5) +
geom_path(show.legend = FALSE, alpha = 0.6, size = 1.2) +
coord_equal()

penguin_final <- penguin_wf %>%
add_model(glm_spec) %>%
last_fit(penguin_split)
! Resample1: model (predictions): prediction from a rank-deficient fit may be misleading
penguin_final
# Monte Carlo cross-validation (0.75/0.25) with 1 resamples
collect_metrics(penguin_final)
collect_predictions(penguin_final) %>%
conf_mat(sex, .pred_class)
#penguin_final$.workflow[[1]] %>%
# tidy(exponentiate = TRUE)
penguins %>%
filter(!is.na(sex)) %>%
ggplot(aes(bill_depth_mm, bill_length_mm, color = sex, size = body_mass_g)) +
geom_point(alpha = 0.5) +
facet_wrap(~species)

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmZyb206IGh0dHBzOi8vanVsaWFzaWxnZS5jb20vYmxvZy9wYWxtZXItcGVuZ3VpbnMvDQotLS0NCg0KYGBge3J9DQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGlkeW1vZGVscykNCmxpYnJhcnkocGFsbWVycGVuZ3VpbnMpDQoNCmBgYA0KDQpgYGB7cn0NCg0KDQpwZW5ndWlucyAlPiUNCiAgZmlsdGVyKCFpcy5uYShzZXgpKSAlPiUNCiAgZ2dwbG90KGFlcyhmbGlwcGVyX2xlbmd0aF9tbSwgYmlsbF9sZW5ndGhfbW0sIGNvbG9yID0gc2V4LCBzaXplID0gYm9keV9tYXNzX2cpKSArDQogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjUpICsNCiAgZmFjZXRfd3JhcCh+c3BlY2llcykNCg0KDQpgYGANCg0KDQoNCmBgYHtyfQ0KDQpwZW5ndWluc19kZiA8LSBwZW5ndWlucyAlPiUNCiAgZmlsdGVyKCFpcy5uYShzZXgpKSAlPiUNCiAgc2VsZWN0KC15ZWFyLCAtaXNsYW5kKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCg0KbGlicmFyeSh0aWR5bW9kZWxzKQ0KDQpzZXQuc2VlZCgxMjMpDQpwZW5ndWluX3NwbGl0IDwtIGluaXRpYWxfc3BsaXQocGVuZ3VpbnNfZGYsIHN0cmF0YSA9IHNleCkNCnBlbmd1aW5fdHJhaW4gPC0gdHJhaW5pbmcocGVuZ3Vpbl9zcGxpdCkNCnBlbmd1aW5fdGVzdCA8LSB0ZXN0aW5nKHBlbmd1aW5fc3BsaXQpDQoNCg0Kc2V0LnNlZWQoMTIzKQ0KcGVuZ3Vpbl9ib290IDwtIGJvb3RzdHJhcHMocGVuZ3Vpbl90cmFpbikNCnBlbmd1aW5fYm9vdA0KYGBgDQoNCg0KDQpgYGB7cn0NCg0KZ2xtX3NwZWMgPC0gbG9naXN0aWNfcmVnKCkgJT4lDQogIHNldF9lbmdpbmUoImdsbSIpDQoNCmdsbV9zcGVjDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyMgTG9naXN0aWMgUmVncmVzc2lvbiBNb2RlbCBTcGVjaWZpY2F0aW9uIChjbGFzc2lmaWNhdGlvbikNCiMjIA0KIyMgQ29tcHV0YXRpb25hbCBlbmdpbmU6IGdsbQ0KcmZfc3BlYyA8LSByYW5kX2ZvcmVzdCgpICU+JQ0KICBzZXRfbW9kZSgiY2xhc3NpZmljYXRpb24iKSAlPiUNCiAgc2V0X2VuZ2luZSgicmFuZ2VyIikNCg0KcmZfc3BlYw0KIyMgUmFuZG9tIEZvcmVzdCBNb2RlbCBTcGVjaWZpY2F0aW9uIChjbGFzc2lmaWNhdGlvbikNCiMjIA0KIyMgQ29tcHV0YXRpb25hbCBlbmdpbmU6IHJhbmdlcg0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KDQpwZW5ndWluX3dmIDwtIHdvcmtmbG93KCkgJT4lDQogIGFkZF9mb3JtdWxhKHNleCB+IC4pDQoNCnBlbmd1aW5fd2YNCmBgYA0KDQoNCg0KYGBge3J9DQoNCmdsbV9ycyA8LSBwZW5ndWluX3dmICU+JQ0KICBhZGRfbW9kZWwoZ2xtX3NwZWMpICU+JQ0KICBmaXRfcmVzYW1wbGVzKA0KICAgIHJlc2FtcGxlcyA9IHBlbmd1aW5fYm9vdCwNCiAgICBjb250cm9sID0gY29udHJvbF9yZXNhbXBsZXMoc2F2ZV9wcmVkID0gVFJVRSkNCiAgKQ0KDQpnbG1fcnMNCmBgYA0KDQoNCg0KYGBge3J9DQoNCnJmX3JzIDwtIHBlbmd1aW5fd2YgJT4lDQogIGFkZF9tb2RlbChyZl9zcGVjKSAlPiUNCiAgZml0X3Jlc2FtcGxlcygNCiAgICByZXNhbXBsZXMgPSBwZW5ndWluX2Jvb3QsDQogICAgY29udHJvbCA9IGNvbnRyb2xfcmVzYW1wbGVzKHNhdmVfcHJlZCA9IFRSVUUpDQogICkNCg0KcmZfcnMNCmBgYA0KDQoNCmBgYHtyfQ0KDQpjb2xsZWN0X21ldHJpY3MocmZfcnMpDQpgYGANCg0KDQoNCmBgYHtyfQ0KYGBgDQoNCg0KYGBge3J9DQpgYGANCg0KDQpgYGB7cn0NCmNvbGxlY3RfbWV0cmljcyhnbG1fcnMpDQogIA0KYGBgDQoNCg0KYGBge3J9DQoNCmdsbV9ycyAlPiUNCiAgY29sbGVjdF9wcmVkaWN0aW9ucygpICU+JQ0KICBncm91cF9ieShpZCkgJT4lDQogIHJvY19jdXJ2ZShzZXgsIC5wcmVkX2ZlbWFsZSkgJT4lDQogIGdncGxvdChhZXMoMSAtIHNwZWNpZmljaXR5LCBzZW5zaXRpdml0eSwgY29sb3IgPSBpZCkpICsNCiAgZ2VvbV9hYmxpbmUobHR5ID0gMiwgY29sb3IgPSAiZ3JheTgwIiwgc2l6ZSA9IDEuNSkgKw0KICBnZW9tX3BhdGgoc2hvdy5sZWdlbmQgPSBGQUxTRSwgYWxwaGEgPSAwLjYsIHNpemUgPSAxLjIpICsNCiAgY29vcmRfZXF1YWwoKQ0KYGBgDQoNCg0KYGBge3J9DQoNCnBlbmd1aW5fZmluYWwgPC0gcGVuZ3Vpbl93ZiAlPiUNCiAgYWRkX21vZGVsKGdsbV9zcGVjKSAlPiUNCiAgbGFzdF9maXQocGVuZ3Vpbl9zcGxpdCkNCg0KcGVuZ3Vpbl9maW5hbA0KYGBgDQoNCmBgYHtyfQ0KY29sbGVjdF9tZXRyaWNzKHBlbmd1aW5fZmluYWwpDQoNCmNvbGxlY3RfcHJlZGljdGlvbnMocGVuZ3Vpbl9maW5hbCkgJT4lDQogIGNvbmZfbWF0KHNleCwgLnByZWRfY2xhc3MpDQpgYGANCg0KDQpgYGB7cn0NCiNwZW5ndWluX2ZpbmFsJC53b3JrZmxvd1tbMV1dICU+JQ0KIyAgdGlkeShleHBvbmVudGlhdGUgPSBUUlVFKQ0KYGBgDQoNCg0KYGBge3J9DQoNCnBlbmd1aW5zICU+JQ0KICBmaWx0ZXIoIWlzLm5hKHNleCkpICU+JQ0KICBnZ3Bsb3QoYWVzKGJpbGxfZGVwdGhfbW0sIGJpbGxfbGVuZ3RoX21tLCBjb2xvciA9IHNleCwgc2l6ZSA9IGJvZHlfbWFzc19nKSkgKw0KICBnZW9tX3BvaW50KGFscGhhID0gMC41KSArDQogIGZhY2V0X3dyYXAofnNwZWNpZXMpDQpgYGANCg0KDQoNCg==