Click here for the data
nyt_titles <- read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_titles.tsv')
## Rows: 7431 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): title, author
## dbl (5): id, year, total_weeks, debut_rank, best_rank
## date (1): first_week
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(nyt_titles)
| Name | nyt_titles |
| Number of rows | 7431 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| Date | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| title | 0 | 1 | 1 | 74 | 0 | 7172 | 0 |
| author | 4 | 1 | 4 | 73 | 0 | 2205 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| first_week | 0 | 1 | 1931-10-12 | 2020-12-06 | 2000-06-25 | 3348 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| id | 0 | 1 | 3715.00 | 2145.29 | 0 | 1857.5 | 3715 | 5572.5 | 7430 | ▇▇▇▇▇ |
| year | 0 | 1 | 1989.61 | 26.23 | 1931 | 1968.0 | 2000 | 2011.0 | 2020 | ▂▂▂▃▇ |
| total_weeks | 0 | 1 | 8.13 | 11.21 | 1 | 2.0 | 4 | 10.0 | 178 | ▇▁▁▁▁ |
| debut_rank | 0 | 1 | 7.90 | 4.57 | 1 | 4.0 | 8 | 12.0 | 17 | ▇▆▅▅▅ |
| best_rank | 0 | 1 | 6.91 | 4.57 | 1 | 3.0 | 6 | 10.0 | 17 | ▇▅▃▃▂ |
data <- nyt_titles
Identify Good Predictors
first week
data %>%
ggplot(aes(first_week, best_rank)) +
geom_point()
author
data %>%
ggplot(aes(author, debut_rank)) +
geom_boxplot()
year
data %>%
ggplot(aes(year, debut_rank)) +
geom_line()
data %>%
# tokenize title
unnest_tokens(output = word, input = title) %>%
# calculate avg rent per word
group_by(word) %>%
summarise(debut_rank = mean(debut_rank),
n = n()) %>%
ungroup() %>%
filter(n > 10, !str_detect(word, "\\d")) %>%
slice_max(order_by = debut_rank, n = 30)
## # A tibble: 31 × 3
## word debut_rank n
## <chr> <dbl> <int>
## 1 fatal 11.7 11
## 2 room 11.1 15
## 3 gate 11 13
## 4 ice 10.9 11
## 5 beyond 10.5 12
## 6 never 10.4 12
## 7 water 10.1 17
## 8 tell 10 11
## 9 devil's 9.93 14
## 10 silver 9.92 13
## # ℹ 21 more rows
data <- sample_n(data, 100)
# Split into training and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
# further split training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [67/8]> Fold01
## 2 <split [67/8]> Fold02
## 3 <split [67/8]> Fold03
## 4 <split [67/8]> Fold04
## 5 <split [67/8]> Fold05
## 6 <split [68/7]> Fold06
## 7 <split [68/7]> Fold07
## 8 <split [68/7]> Fold08
## 9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
library(usemodels)
usemodels::use_xgboost(debut_rank ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = debut_rank ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(6804)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# specify recipe
xgboost_recipe <-
recipe(formula = debut_rank ~ ., data = data_train) %>%
recipes::update_role(title, new_role = "id variable") %>%
step_tokenize(title) %>%
step_tokenfilter(max_tokens = 100) %>%
step_tfidf(title) %>%
step_other(author) %>%
step_log(total_weeks, debut_rank, best_rank)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 166
## $ id <dbl> 6362, 1789, 135, 7388, 4260, 7125, 2170, 4247…
## $ author <fct> other, other, other, other, other, other, oth…
## $ year <dbl> 1937, 2010, 2015, 1974, 1946, 1958, 2016, 197…
## $ total_weeks <dbl> 0.0000000, 1.0986123, 2.3025851, 1.9459101, 2…
## $ first_week <date> 1937-02-08, 2010-05-30, 2015-10-25, 1974-05-…
## $ best_rank <dbl> 1.9459101, 1.0986123, 0.6931472, 1.9459101, 1…
## $ debut_rank <dbl> 1.945910, 1.386294, 2.639057, 2.079442, 1.945…
## $ tfidf_title_a <dbl> 0.0000000, 0.0000000, 0.5430161, 0.0000000, 0…
## $ tfidf_title_alive <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_all <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_am <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_and <dbl> 0.0000000, 0.0000000, 0.0000000, 0.8145241, 0…
## $ tfidf_title_babe <dbl> 0.000000, 0.000000, 0.000000, 1.082683, 0.000…
## $ tfidf_title_babylon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_be <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_belgarath <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_belongs <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_bleu <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_bones <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_box <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bright <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_burgoyne <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_camillo <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_catcher <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_chance <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_christmas <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_clown <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_code <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_cold <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_contenders <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_d.c <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_da <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_danger <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_dead <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_deceiver <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_demon <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_destiny <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_dirty <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_don <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_doomsday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_dragons <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_dream <dbl> 0.000000, 2.165367, 0.000000, 0.000000, 0.000…
## $ tfidf_title_dress <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_eagle <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_earth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_edge <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_empire <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_esquire <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_europe <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_fall <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_fancy <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_feet <dbl> 0.8661467, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_fever <dbl> 0.000000, 2.165367, 0.000000, 0.000000, 0.000…
## $ tfidf_title_fire <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_flames <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_flight <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_flock <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_flown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_for <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_gemini <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_gold <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_gondolin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_goodbye <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_greengage <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_guns <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_h <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_halo <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_has <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_have <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_heart <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_here <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_his <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_hornet <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_house <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_i <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ `tfidf_title_i'll` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_ice <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_in <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_inherent <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_job <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_karanda <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_key <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_kin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_kingdoms <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000, 0…
## $ tfidf_title_kiss <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_kissing <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_knight <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000, 0…
## $ tfidf_title_knives <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_labyrinth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_last <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_lies <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_life <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_lightship <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_like <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_lord <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_m <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_me <dbl> 0.0000000, 0.0000000, 0.0000000, 0.9126646, 0…
## $ tfidf_title_midnight <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_month <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_my <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_mystery <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_nature <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_nine <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_not <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_obsession <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_of <dbl> 0.3962003, 0.0000000, 0.3301669, 0.0000000, 0…
## $ tfidf_title_old <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_or <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_our <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_party <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_phasma <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_pillars <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_pink <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_pot <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_pretty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_pulham <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_rising <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_running <dbl> 0.8661467, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_rye <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_sacré <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_second <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_secret <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_seeing <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_services <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_seven <dbl> 0.0000000, 0.0000000, 0.7217889, 0.0000000, 0…
## $ tfidf_title_shalimar <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_silentium <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_skin <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_soldier <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_someone <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_song <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_sorcerer <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_sound <dbl> 0.8661467, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_special <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_sphinx <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_spider <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_spirits <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_spoonhandle <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 4.330…
## $ tfidf_title_star <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_summer <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_sundays <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_superstition <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_suspect <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_suspicion <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_temptation <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_tenderloin <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_terrorist <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_the <dbl> 0.2967337, 0.0000000, 0.2472781, 0.0000000, 0…
## $ tfidf_title_this <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_to <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
## $ tfidf_title_tomorrow <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_toys <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_twenty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_vice <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_victorine <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_vinci <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_wars <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_washington <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_white <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_wishes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_with <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000…
## $ tfidf_title_woman <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_wrecker <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_years <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_you <dbl> 0.0000000, 0.0000000, 0.0000000, 0.8145241, 0…
## $ tfidf_title_your <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0…
# Specify model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine recipe and model using workflow
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
# tune hyperparameters
set.seed(6804)
xgboost_tune <-
tune_grid(xgboost_workflow, resamples = data_cv,
grid = 10)
## i Creating pre-processing data to finalize unknown parameter: mtry
## → A | error: Some columns are non-numeric. The data cannot be converted to numeric matrix: 'author', 'first_week'.
##
There were issues with some computations A: x1
There were issues with some computations A: x2
There were issues with some computations A: x7
There were issues with some computations A: x11
There were issues with some computations A: x16
There were issues with some computations A: x21
There were issues with some computations A: x26
There were issues with some computations A: x31
There were issues with some computations A: x34
There were issues with some computations A: x40
There were issues with some computations A: x43
There were issues with some computations A: x49
There were issues with some computations A: x53
There were issues with some computations A: x58
There were issues with some computations A: x62
There were issues with some computations A: x68
There were issues with some computations A: x72
There were issues with some computations A: x77
There were issues with some computations A: x81
There were issues with some computations A: x86
There were issues with some computations A: x91
There were issues with some computations A: x96
There were issues with some computations A: x100
## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.