Build models
data <- sample_n(data, 100)
# Split into train and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
# Further spit training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [67/8]> Fold01
## 2 <split [67/8]> Fold02
## 3 <split [67/8]> Fold03
## 4 <split [67/8]> Fold04
## 5 <split [67/8]> Fold05
## 6 <split [68/7]> Fold06
## 7 <split [68/7]> Fold07
## 8 <split [68/7]> Fold08
## 9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
library(usemodels)
## Warning: package 'usemodels' was built under R version 4.5.2
usemodels::use_xgboost(total_weeks ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = total_weeks ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(6804)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# Specify recipe
xgboost_recipe <-
recipe(formula = total_weeks ~ ., data = data_train) %>%
recipes::update_role(id, new_role = "id variable") %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tfidf(title) %>%
step_tokenize(author) %>%
step_tokenfilter(author, max_tokens = 100) %>%
step_tfidf(author) %>%
step_zv(all_predictors()) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 203
## $ id <dbl> 6499, 3669, 840, 234, 4209, 4702, 4054, 103…
## $ year <dbl> 1959, 2002, 2013, 2015, 1948, 1980, 2003, 1…
## $ total_weeks <dbl> 1.0986123, 1.7917595, 0.6931472, 2.5649494,…
## $ tfidf_title_47th <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_a <dbl> 0.0000000, 0.0000000, 0.0000000, 0.9943845,…
## $ tfidf_title_abiding <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_abode <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_about <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000,…
## $ tfidf_title_abstinence <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_account <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_agent <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_american <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_and <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_any <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_are <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bare <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_blue <dbl> 0.000000, 0.000000, 0.000000, 1.443578, 0.0…
## $ tfidf_title_blues <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bones <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_bourne <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_boy <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000,…
## $ tfidf_title_bride <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_bridget <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000,…
## $ tfidf_title_career <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_catch <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_chances <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_china <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_choice <dbl> 2.165367, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_coma <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_corner <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_countdown <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_court <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_courtship <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_creation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_darkest <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_darkness <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_date <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_daughter <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_dead <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_death <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_demons <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_die <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_dog <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_dreams <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_dwell <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_evening <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_evil <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_eye <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_few <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_first <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_fisherman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_for <dbl> 0.000000, 1.491577, 0.000000, 0.000000, 0.0…
## $ tfidf_title_four <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_from <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_frozen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_fundamentalist <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_gathering <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_girl <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_glory <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_god <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_goddess <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_golden <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_goodbar <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hard <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_heaven <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_his <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_homeland <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hound <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_hours <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_identity <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_in <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_is <dbl> 0.000000, 1.825329, 0.000000, 0.000000, 0.0…
## $ tfidf_title_jones <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000,…
## $ tfidf_title_king <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_knows <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_last <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_lawyer <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_leaving <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_legacy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_leia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_light <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_line <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_looking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_lord <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_love <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_mad <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000,…
## $ tfidf_title_man <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_matarese <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_me <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_midnight <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mischief <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_moon <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 1.2…
## $ tfidf_title_more <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mountains <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_mr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_naked <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_title_nature <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_title_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_of <dbl> 0.0000000, 0.0000000, 0.0000000, 0.5972532,…
## $ tfidf_title_shadow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_the <dbl> 0.6645680, 0.0000000, 0.2215227, 0.0000000,…
## $ tfidf_title_time <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_title_to <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_a <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_allison <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_amanda <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_and <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_andrews <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_ann <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_anne <dbl> 0.000000, 0.000000, 0.000000, 4.330733, 0.0…
## $ tfidf_author_aubrey <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_bach <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_berry <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_bradda <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_brandon <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_burcell <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_by <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_caldwell <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_carl <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_carré <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_child <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_christopher <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_cleeves <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_clive <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_cook <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_crombie <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_cussler <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_d <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_danielle <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_dave <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_dean <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_deborah <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_denis <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_dorst <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_doug <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_dustin <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_edward <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_elizabeth <dbl> 2.165367, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_erica <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_evanovich <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_field <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_fielding <dbl> 0.000000, 0.000000, 2.165367, 0.000000, 0.0…
## $ tfidf_author_fred <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_galbraith <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_gallico <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_garwood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_george <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 1.4…
## $ tfidf_author_gipson <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_godden <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_goodkind <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_grafton <dbl> 0.000000, 1.825329, 0.000000, 0.000000, 0.0…
## $ tfidf_author_grisham <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_hamid <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_harold <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_harrison <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_harry <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_hayden <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_helen <dbl> 0.000000, 0.000000, 2.165367, 0.000000, 0.0…
## $ tfidf_author_heller <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_hiaasen <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_hitrec <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 1.4…
## $ tfidf_author_hunter <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_ian <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_illustrated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_j <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_j.d <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_jacqueline <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_jakes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_jan <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_jance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_janet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_janeway <dbl> 2.165367, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_jayne <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_jeff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_jodi <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_john <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_johnson <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_jong <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_jordan <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_joseph <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 1.2…
## $ tfidf_author_judith <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_julie <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_karin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_karon <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_kathy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_kay <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_kaye <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_kim <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_kinsella <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_koontz <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_krentz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_l <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_le <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_lee <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_ludlum <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_mary <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_morris <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_richard <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ tfidf_author_robb <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.0…
## $ tfidf_author_robert <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_robin <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000,…
## $ tfidf_author_sue <dbl> 0.000000, 1.825329, 0.000000, 0.000000, 0.0…
## $ tfidf_author_west <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
# Specify model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine recipe and models using workflow
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
# Tune hyperparameters
set.seed(6804)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5)
## → A | warning: A correlation computation is required, but `estimate` is constant and has 0
## standard deviation, resulting in a divide by 0 error. `NA` will be returned.
## There were issues with some computations A: x3There were issues with some computations A: x6There were issues with some computations A: x9There were issues with some computations A: x12There were issues with some computations A: x15There were issues with some computations A: x18There were issues with some computations A: x21There were issues with some computations A: x24There were issues with some computations A: x27There were issues with some computations A: x30There were issues with some computations A: x30