Build Models
data <- sample_n(data, 100)
# Split into train and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
# Further spit training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [67/8]> Fold01
## 2 <split [67/8]> Fold02
## 3 <split [67/8]> Fold03
## 4 <split [67/8]> Fold04
## 5 <split [67/8]> Fold05
## 6 <split [68/7]> Fold06
## 7 <split [68/7]> Fold07
## 8 <split [68/7]> Fold08
## 9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
usemodels::use_xgboost(total_weeks ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = total_weeks ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(6804)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# Specify recipe
xgboost_recipe <-
recipe(formula = total_weeks ~ ., data = data_train) %>%
recipes::update_role(id, new_role = "id variable") %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tfidf(title) %>%
step_tokenize(author) %>%
step_tokenfilter(author, max_tokens = 100) %>%
step_tfidf(author) %>%
step_zv(all_predictors()) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 203
## $ id <dbl> 88, 466, 6832, 671, 3125, 4482, 1278, 622, 1…
## $ year <dbl> 1938, 1986, 1938, 1940, 1989, 1968, 2016, 20…
## $ total_weeks <dbl> 1.6094379, 1.7917595, 2.7080502, 1.6094379, …
## $ tfidf_title_a <dbl> 0.9126646, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_accident <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_after <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_against <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_all <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_and <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_angels <dbl> 0.000000, 2.165367, 0.000000, 0.000000, 0.00…
## $ tfidf_title_appointment <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_at <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_battle <dbl> 1.082683, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_beauchamp <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_beautiful <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_berlin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bethel <dbl> 0.000000, 0.000000, 0.000000, 2.165367, 0.00…
## $ tfidf_title_big <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_black <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_blood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bloom <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_box <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bridge <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_bug <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cancer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_center <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_children <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_clawback <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_confessional <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_conviction <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cruise <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_dark <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_darkness <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_daughters <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_day <dbl> 0.9126646, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_death <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_decision <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_destiny <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ `tfidf_title_devil's` <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_die <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_dollmaker <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_done <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_double <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_durable <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_edge <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_egyptian <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_elm <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_enigma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_eternal <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_evening <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_fate <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_file <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_fire <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_flaminia <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_friends <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_funeral <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_generation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_girl <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_gold <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_golden <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_gone <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_happy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_harbor <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_harbours <dbl> 0.000000, 0.000000, 2.165367, 0.000000, 0.00…
## $ tfidf_title_heat <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hello <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_hit <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hollywood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_home <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_in <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_innocent <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_isle <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_jumping <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_jupiter <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_justice <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_kahuna <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_kennedy <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_killer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_liang <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_life <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_lived <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_lovers <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_madame <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_man <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_match <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_matter <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_meet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_men <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_merriday <dbl> 0.000000, 0.000000, 0.000000, 2.165367, 0.00…
## $ tfidf_title_mister <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_moment <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_morning <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_murder <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_new <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_night <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_normal <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_odds <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_odessa <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_of <dbl> 0.5350165, 1.0700331, 0.0000000, 0.0000000, …
## $ tfidf_title_on <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_the <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_three <dbl> 0.000000, 0.000000, 1.825329, 0.000000, 0.00…
## $ tfidf_author_a <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_aaron <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_adams <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_alfred <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_alice <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_allston <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_and <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_author_andrew <dbl> 0.000000, 2.165367, 0.000000, 0.000000, 0.00…
## $ tfidf_author_angela <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_anne <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_arnow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_baldacci <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_blake <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_bock <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_brace <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_brad <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_brown <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_buck <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_caldwell <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_carey <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_carol <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_catherine <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_charles <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_chevalier <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_christopher <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_chuck <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_clark <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_collins <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_connelly <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_coulter <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_crais <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_crouch <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_d <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_danielle <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_davenport <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_david <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_davidson <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_davis <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_deaver <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_debbie <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_deighton <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_del <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_diane <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_donaldson <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_elizabeth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_ernestine <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_evan <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_evanovich <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_evans <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_f <dbl> 0.000000, 0.000000, 4.330733, 0.000000, 0.00…
## $ tfidf_author_fairstein <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_feuchtwanger <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_flanagan <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_forsyth <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_forsythe <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_frank <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_frankau <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_frederick <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_french <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_g <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_garwood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_gerald <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_gilbreth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_greeley <dbl> 0.000000, 2.165367, 0.000000, 0.000000, 0.00…
## $ tfidf_author_green <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_guillermo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_h <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_halley <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_harold <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_harriette <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_haruf <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_hayes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_heggen <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_helen <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_henry <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_higgins <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_hogan <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_howard <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_hunter <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_ian <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_isherwood <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_j <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_jack <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_jackie <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_jance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_jane <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_janet <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_jeffery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_jodi <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_john <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_julie <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_kent <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_lagercrantz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_lawrence <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_len <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_lenoir <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_linda <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_steel <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_author_stuart <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_author_thomas <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
``` r
# Specify model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine recipe and models using workflow
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
# Tune hyperparameters
set.seed(6804)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5)