Goal: Compare and Contrast all the different Superbowl commercials after they had aired. What did these companies do well or poorly based on numbers.
Click [here for the Data](youtube <- readr::read_csv(‘https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-02/youtube.csv’)
#Import Data
youtube <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-02/youtube.csv')
skimr::skim(youtube)
Name | youtube |
Number of rows | 247 |
Number of columns | 25 |
_______________________ | |
Column type frequency: | |
character | 10 |
logical | 7 |
numeric | 7 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
brand | 0 | 1.00 | 3 | 9 | 0 | 10 | 0 |
superbowl_ads_dot_com_url | 0 | 1.00 | 34 | 120 | 0 | 244 | 0 |
youtube_url | 11 | 0.96 | 43 | 43 | 0 | 233 | 0 |
id | 11 | 0.96 | 11 | 11 | 0 | 233 | 0 |
kind | 16 | 0.94 | 13 | 13 | 0 | 1 | 0 |
etag | 16 | 0.94 | 27 | 27 | 0 | 228 | 0 |
title | 16 | 0.94 | 6 | 99 | 0 | 228 | 0 |
description | 50 | 0.80 | 3 | 3527 | 0 | 194 | 0 |
thumbnail | 129 | 0.48 | 48 | 48 | 0 | 118 | 0 |
channel_title | 16 | 0.94 | 3 | 37 | 0 | 185 | 0 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
funny | 0 | 1 | 0.69 | TRU: 171, FAL: 76 |
show_product_quickly | 0 | 1 | 0.68 | TRU: 169, FAL: 78 |
patriotic | 0 | 1 | 0.17 | FAL: 206, TRU: 41 |
celebrity | 0 | 1 | 0.29 | FAL: 176, TRU: 71 |
danger | 0 | 1 | 0.30 | FAL: 172, TRU: 75 |
animals | 0 | 1 | 0.37 | FAL: 155, TRU: 92 |
use_sex | 0 | 1 | 0.27 | FAL: 181, TRU: 66 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 0 | 1.00 | 2010.19 | 5.86 | 2000 | 2005 | 2010 | 2015.00 | 2020 | ▇▇▇▇▆ |
view_count | 16 | 0.94 | 1407556.46 | 11971111.01 | 10 | 6431 | 41379 | 170015.50 | 176373378 | ▇▁▁▁▁ |
like_count | 22 | 0.91 | 4146.03 | 23920.40 | 0 | 19 | 130 | 527.00 | 275362 | ▇▁▁▁▁ |
dislike_count | 22 | 0.91 | 833.54 | 6948.52 | 0 | 1 | 7 | 24.00 | 92990 | ▇▁▁▁▁ |
favorite_count | 16 | 0.94 | 0.00 | 0.00 | 0 | 0 | 0 | 0.00 | 0 | ▁▁▇▁▁ |
comment_count | 25 | 0.90 | 188.64 | 986.46 | 0 | 1 | 10 | 50.75 | 9190 | ▇▁▁▁▁ |
category_id | 16 | 0.94 | 19.32 | 8.00 | 1 | 17 | 23 | 24.00 | 29 | ▃▁▂▆▇ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
published_at | 16 | 0.94 | 2006-02-06 10:02:36 | 2021-01-27 13:11:29 | 2013-01-31 09:13:55 | 227 |
data <- youtube %>%
#Treat Missing Values
select(-thumbnail, -description, -etag, -comment_count, -youtube_url, -published_at, -superbowl_ads_dot_com_url) %>%
na.omit() %>%
# log transform variables with pos-skewed distribution
mutate(like_count = log(like_count +1)) %>%
mutate(category_id = as.factor(category_id)) %>%
mutate(across(where(is.character), as.factor)) %>%
mutate(across(where(is.logical), as.factor)) %>%
mutate(title = as.character(title))
Identify predictors
Like count
data %>%
ggplot(aes(view_count, like_count)) +
scale_y_log10() +
geom_point()
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
Dislike count
data %>%
ggplot(aes(view_count, dislike_count)) +
geom_point()
Favorite count
data %>%
#Tokenize
unnest_tokens(output = brand, input = title) %>%
#Calculate
group_by(brand) %>%
summarise(view_count = mean(view_count),
n =n()) %>%
ungroup() %>%
filter(n > 10) %>%
slice_max(order_by = brand, n = 20) %>%
#Plot
ggplot(aes(view_count, fct_reorder(brand, view_count))) +
geom_point() +
labs(y = "Brands") +
labs(x = "View Count")
EDA Shortcut
#Step 1: Prepare Data
data_binarized_tbl <- data %>%
select(-id, -title) %>%
binarize()
data_binarized_tbl %>% glimpse()
## Rows: 225
## Columns: 62
## $ `year__-Inf_2005` <dbl> 0, 0, 0, 0, 1, 0, 0, 0, …
## $ year__2005_2010 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, …
## $ year__2010_2015 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ year__2015_Inf <dbl> 1, 1, 0, 1, 0, 1, 1, 1, …
## $ brand__Bud_Light <dbl> 0, 1, 1, 0, 1, 0, 0, 0, …
## $ brand__Budweiser <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `brand__Coca-Cola` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, …
## $ brand__Doritos <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `brand__E-Trade` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ brand__Hynudai <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ brand__Kia <dbl> 0, 0, 0, 0, 0, 0, 0, 1, …
## $ brand__NFL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ brand__Pepsi <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ brand__Toyota <dbl> 1, 0, 0, 0, 0, 1, 0, 0, …
## $ funny__FALSE <dbl> 1, 0, 0, 1, 0, 0, 0, 1, …
## $ funny__TRUE <dbl> 0, 1, 1, 0, 1, 1, 1, 0, …
## $ show_product_quickly__FALSE <dbl> 1, 0, 1, 0, 0, 0, 1, 1, …
## $ show_product_quickly__TRUE <dbl> 0, 1, 0, 1, 1, 1, 0, 0, …
## $ patriotic__FALSE <dbl> 1, 1, 1, 1, 1, 1, 1, 1, …
## $ patriotic__TRUE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ celebrity__FALSE <dbl> 1, 0, 1, 1, 1, 0, 0, 0, …
## $ celebrity__TRUE <dbl> 0, 1, 0, 0, 0, 1, 1, 1, …
## $ danger__FALSE <dbl> 1, 0, 0, 1, 0, 0, 1, 1, …
## $ danger__TRUE <dbl> 0, 1, 1, 0, 1, 1, 0, 0, …
## $ animals__FALSE <dbl> 1, 1, 0, 1, 0, 0, 0, 1, …
## $ animals__TRUE <dbl> 0, 0, 1, 0, 1, 1, 1, 0, …
## $ use_sex__FALSE <dbl> 1, 1, 1, 1, 0, 1, 1, 1, …
## $ use_sex__TRUE <dbl> 0, 0, 0, 0, 1, 0, 0, 0, …
## $ `view_count__-Inf_6641` <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ view_count__6641_43983 <dbl> 0, 0, 0, 0, 1, 1, 0, 1, …
## $ view_count__43983_175482 <dbl> 1, 1, 1, 0, 0, 0, 0, 0, …
## $ view_count__175482_Inf <dbl> 0, 0, 0, 0, 0, 0, 1, 0, …
## $ `like_count__-Inf_2.99573227355399` <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ like_count__2.99573227355399_4.87519732320115 <dbl> 0, 0, 1, 0, 1, 1, 0, 1, …
## $ like_count__4.87519732320115_6.26909628370626 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, …
## $ like_count__6.26909628370626_Inf <dbl> 1, 0, 0, 0, 0, 0, 1, 0, …
## $ `dislike_count__-Inf_1` <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ dislike_count__1_7 <dbl> 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dislike_count__7_24 <dbl> 0, 1, 1, 0, 0, 1, 0, 0, …
## $ dislike_count__24_Inf <dbl> 1, 0, 0, 0, 0, 0, 1, 0, …
## $ channel_title__BudBowlXLII <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `channel_title__Coca-Cola` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, …
## $ channel_title__Funny_Commercials <dbl> 1, 0, 0, 0, 0, 1, 0, 0, …
## $ channel_title__John_Keehler <dbl> 0, 0, 1, 0, 0, 0, 0, 0, …
## $ channel_title__NFL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__omon007 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__reggiep08v2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__The_Hall_of_Advertising <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__USA_TODAY <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__World_Hyundai_Matteson <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `channel_title__-OTHER` <dbl> 0, 1, 0, 1, 1, 0, 0, 1, …
## $ category_id__1 <dbl> 1, 0, 0, 0, 0, 1, 0, 0, …
## $ category_id__2 <dbl> 0, 0, 0, 0, 0, 0, 0, 1, …
## $ category_id__10 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__15 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__17 <dbl> 0, 0, 1, 0, 0, 0, 0, 0, …
## $ category_id__22 <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ category_id__23 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__24 <dbl> 0, 0, 0, 0, 1, 0, 1, 0, …
## $ category_id__25 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__27 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, …
## $ `category_id__-OTHER` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: Correlate
data_corr_tbl <- data_binarized_tbl %>%
correlate(like_count__6.26909628370626_Inf)
data_corr_tbl
## # A tibble: 62 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 like_count 6.26909628370626_Inf 1
## 2 view_count 175482_Inf 0.715
## 3 dislike_count 24_Inf 0.715
## 4 dislike_count -Inf_1 -0.355
## 5 like_count -Inf_2.99573227355399 -0.339
## 6 dislike_count 1_7 -0.339
## 7 view_count -Inf_6641 -0.335
## 8 like_count 4.87519732320115_6.26909628370626 -0.331
## 9 like_count 2.99573227355399_4.87519732320115 -0.327
## 10 view_count 6641_43983 -0.308
## # ℹ 52 more rows
#Step 3: Plot
data_corr_tbl %>%
plot_correlation_funnel()
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
Split Data
data <- sample_n(data, 100)
# Split into train and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)
# Further split training dataset for cross-validation
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [67/8]> Fold01
## 2 <split [67/8]> Fold02
## 3 <split [67/8]> Fold03
## 4 <split [67/8]> Fold04
## 5 <split [67/8]> Fold05
## 6 <split [68/7]> Fold06
## 7 <split [68/7]> Fold07
## 8 <split [68/7]> Fold08
## 9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
library(usemodels)
## Warning: package 'usemodels' was built under R version 4.4.2
usemodels::use_xgboost(like_count ~ ., data = data_train)
## xgboost_recipe <-
## recipe(formula = like_count ~ ., data = data_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(6804)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# Specify Recipe
xgboost_recipe <-
recipe(formula = like_count ~ ., data = data_train) %>%
recipes::update_role(id, new_role = "brand variable") %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tfidf(title) %>%
step_other(channel_title)
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 117
## $ year <dbl> 2015, 2011, 2003, 2011, 2001, 2008, 2010, 20…
## $ brand <fct> Budweiser, Coca-Cola, Bud Light, Bud Light, …
## $ funny <fct> FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T…
## $ show_product_quickly <fct> TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, …
## $ patriotic <fct> TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALS…
## $ celebrity <fct> FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALS…
## $ danger <fct> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE,…
## $ animals <fct> TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE…
## $ use_sex <fct> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FAL…
## $ id <fct> otCxSnu_HXA, nAJvHsVoLq0, _cEQ5g6bzdM, JJn1v…
## $ kind <fct> youtube#video, youtube#video, youtube#video,…
## $ view_count <dbl> 491630, 87396, 3754, 350, 1475, 729583, 1160…
## $ dislike_count <dbl> 37, 2, 1, 1, 0, 180, 0, 3, 1, 15, 0, 1, 1, 4…
## $ favorite_count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ channel_title <fct> other, alohawarriorchief, other, other, othe…
## $ category_id <fct> 23, 24, 23, 23, 24, 23, 23, 27, 23, 24, 24, …
## $ like_count <dbl> 7.7527648, 3.9889840, 2.9444390, 0.6931472, …
## $ tfidf_title_03 <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_2000 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2001 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2002 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2005 <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_2007 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2008 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2008genesis <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2009 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2010 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2012 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2013 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2014 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2015 <dbl> 0.3608944, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2016 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2018 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2019 <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2020 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_42 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_44 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_720p <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_90 <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_a <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_ad <dbl> 0.1949499, 0.4678798, 0.0000000, 0.0000000, …
## $ tfidf_title_ads <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_advertisement <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_allowed <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_another <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_anthem <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_apes <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_babies <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_baby <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bad <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_battle <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_beautiful <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_best <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bestbuds <dbl> 0.3608944, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_better <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_big <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bmw <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bot <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bowl <dbl> 0.09708228, 0.23299747, 0.00000000, 0.194164…
## $ tfidf_title_britney <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_brosnan <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_brotherhood <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_brown <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bubbly <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_buble <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_bubly <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_bud <dbl> 0.0000000, 0.0000000, 0.7994279, 0.2664760, …
## $ tfidf_title_budlight <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_budweiser <dbl> 0.1783388, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_by <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cards <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_carlos <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_casket <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_casting <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cedric <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_checkout <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_chessmaster <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_cindy <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_clydesdale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_clydesdales <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_coca <dbl> 0.0000000, 0.4678798, 0.0000000, 0.0000000, …
## $ tfidf_title_coke <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_cola <dbl> 0.0000000, 0.4678798, 0.0000000, 0.0000000, …
## $ tfidf_title_collar <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_commercial <dbl> 0.08173577, 0.00000000, 0.00000000, 0.163471…
## $ tfidf_title_commercials <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cool <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_date <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_dog <dbl> 0.2485961, 0.0000000, 0.0000000, 0.4971922, …
## $ tfidf_title_doritos <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_e <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_elantra <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_extended <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_game <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hd <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hyundai <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_it <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_kia <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_light <dbl> 0.0000000, 0.0000000, 0.8211139, 0.2737046, …
## $ tfidf_title_love <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_monkey <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_nfl <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_of <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_official <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_pepsi <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_puppy <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_sorento <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_spot <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_super <dbl> 0.09708228, 0.23299747, 0.00000000, 0.194164…
## $ tfidf_title_superbowl <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_the <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_toyota <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_trade <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_tv <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_vs <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_xli <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_xlix <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
# Specify Model
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(xgboost_recipe) %>%
add_model(xgboost_spec)
set.seed(127)
xgboost_tune <-
tune_grid(xgboost_workflow,
resamples = data_cv,
grid = 5)
## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.