Goal: Compare and Contrast all the different Superbowl commercials after they had aired. What did these companies do well or poorly based on numbers.

Click [here for the Data](youtube <- readr::read_csv(‘https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-02/youtube.csv’)

#Import Data

youtube <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-02/youtube.csv')

skimr::skim(youtube)
Data summary
Name youtube
Number of rows 247
Number of columns 25
_______________________
Column type frequency:
character 10
logical 7
numeric 7
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
brand 0 1.00 3 9 0 10 0
superbowl_ads_dot_com_url 0 1.00 34 120 0 244 0
youtube_url 11 0.96 43 43 0 233 0
id 11 0.96 11 11 0 233 0
kind 16 0.94 13 13 0 1 0
etag 16 0.94 27 27 0 228 0
title 16 0.94 6 99 0 228 0
description 50 0.80 3 3527 0 194 0
thumbnail 129 0.48 48 48 0 118 0
channel_title 16 0.94 3 37 0 185 0

Variable type: logical

skim_variable n_missing complete_rate mean count
funny 0 1 0.69 TRU: 171, FAL: 76
show_product_quickly 0 1 0.68 TRU: 169, FAL: 78
patriotic 0 1 0.17 FAL: 206, TRU: 41
celebrity 0 1 0.29 FAL: 176, TRU: 71
danger 0 1 0.30 FAL: 172, TRU: 75
animals 0 1 0.37 FAL: 155, TRU: 92
use_sex 0 1 0.27 FAL: 181, TRU: 66

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2010.19 5.86 2000 2005 2010 2015.00 2020 ▇▇▇▇▆
view_count 16 0.94 1407556.46 11971111.01 10 6431 41379 170015.50 176373378 ▇▁▁▁▁
like_count 22 0.91 4146.03 23920.40 0 19 130 527.00 275362 ▇▁▁▁▁
dislike_count 22 0.91 833.54 6948.52 0 1 7 24.00 92990 ▇▁▁▁▁
favorite_count 16 0.94 0.00 0.00 0 0 0 0.00 0 ▁▁▇▁▁
comment_count 25 0.90 188.64 986.46 0 1 10 50.75 9190 ▇▁▁▁▁
category_id 16 0.94 19.32 8.00 1 17 23 24.00 29 ▃▁▂▆▇

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
published_at 16 0.94 2006-02-06 10:02:36 2021-01-27 13:11:29 2013-01-31 09:13:55 227
data <- youtube %>%
    
    #Treat Missing Values 
    select(-thumbnail, -description, -etag, -comment_count, -youtube_url, -published_at, -superbowl_ads_dot_com_url) %>%
    na.omit() %>%
    
    # log transform variables with pos-skewed distribution
    mutate(like_count = log(like_count +1)) %>%
    mutate(category_id = as.factor(category_id)) %>%
    mutate(across(where(is.character), as.factor)) %>%
    mutate(across(where(is.logical), as.factor)) %>%
    mutate(title = as.character(title))

Explore Data

Identify predictors

Like count

data %>%
    ggplot(aes(view_count, like_count)) +
    scale_y_log10() +
    geom_point()
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.

Dislike count

data %>% 
    ggplot(aes(view_count, dislike_count)) +
    geom_point()

Favorite count

data %>%
    
    #Tokenize
    unnest_tokens(output = brand, input = title) %>%
   
    #Calculate
    group_by(brand) %>%
    summarise(view_count = mean(view_count),
             n     =n()) %>%
    ungroup() %>%
    
    filter(n > 10) %>% 
    slice_max(order_by = brand, n = 20) %>%
    
    #Plot
   ggplot(aes(view_count, fct_reorder(brand, view_count))) +
    geom_point() +

    labs(y = "Brands") +
    labs(x = "View Count")

EDA Shortcut

#Step 1: Prepare Data
data_binarized_tbl <- data %>%
    select(-id, -title) %>%
    binarize()

data_binarized_tbl %>% glimpse()
## Rows: 225
## Columns: 62
## $ `year__-Inf_2005`                             <dbl> 0, 0, 0, 0, 1, 0, 0, 0, …
## $ year__2005_2010                               <dbl> 0, 0, 1, 0, 0, 0, 0, 0, …
## $ year__2010_2015                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ year__2015_Inf                                <dbl> 1, 1, 0, 1, 0, 1, 1, 1, …
## $ brand__Bud_Light                              <dbl> 0, 1, 1, 0, 1, 0, 0, 0, …
## $ brand__Budweiser                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `brand__Coca-Cola`                            <dbl> 0, 0, 0, 0, 0, 0, 1, 0, …
## $ brand__Doritos                                <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `brand__E-Trade`                              <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ brand__Hynudai                                <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ brand__Kia                                    <dbl> 0, 0, 0, 0, 0, 0, 0, 1, …
## $ brand__NFL                                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ brand__Pepsi                                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ brand__Toyota                                 <dbl> 1, 0, 0, 0, 0, 1, 0, 0, …
## $ funny__FALSE                                  <dbl> 1, 0, 0, 1, 0, 0, 0, 1, …
## $ funny__TRUE                                   <dbl> 0, 1, 1, 0, 1, 1, 1, 0, …
## $ show_product_quickly__FALSE                   <dbl> 1, 0, 1, 0, 0, 0, 1, 1, …
## $ show_product_quickly__TRUE                    <dbl> 0, 1, 0, 1, 1, 1, 0, 0, …
## $ patriotic__FALSE                              <dbl> 1, 1, 1, 1, 1, 1, 1, 1, …
## $ patriotic__TRUE                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ celebrity__FALSE                              <dbl> 1, 0, 1, 1, 1, 0, 0, 0, …
## $ celebrity__TRUE                               <dbl> 0, 1, 0, 0, 0, 1, 1, 1, …
## $ danger__FALSE                                 <dbl> 1, 0, 0, 1, 0, 0, 1, 1, …
## $ danger__TRUE                                  <dbl> 0, 1, 1, 0, 1, 1, 0, 0, …
## $ animals__FALSE                                <dbl> 1, 1, 0, 1, 0, 0, 0, 1, …
## $ animals__TRUE                                 <dbl> 0, 0, 1, 0, 1, 1, 1, 0, …
## $ use_sex__FALSE                                <dbl> 1, 1, 1, 1, 0, 1, 1, 1, …
## $ use_sex__TRUE                                 <dbl> 0, 0, 0, 0, 1, 0, 0, 0, …
## $ `view_count__-Inf_6641`                       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ view_count__6641_43983                        <dbl> 0, 0, 0, 0, 1, 1, 0, 1, …
## $ view_count__43983_175482                      <dbl> 1, 1, 1, 0, 0, 0, 0, 0, …
## $ view_count__175482_Inf                        <dbl> 0, 0, 0, 0, 0, 0, 1, 0, …
## $ `like_count__-Inf_2.99573227355399`           <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ like_count__2.99573227355399_4.87519732320115 <dbl> 0, 0, 1, 0, 1, 1, 0, 1, …
## $ like_count__4.87519732320115_6.26909628370626 <dbl> 0, 1, 0, 0, 0, 0, 0, 0, …
## $ like_count__6.26909628370626_Inf              <dbl> 1, 0, 0, 0, 0, 0, 1, 0, …
## $ `dislike_count__-Inf_1`                       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ dislike_count__1_7                            <dbl> 0, 0, 0, 0, 1, 0, 0, 1, …
## $ dislike_count__7_24                           <dbl> 0, 1, 1, 0, 0, 1, 0, 0, …
## $ dislike_count__24_Inf                         <dbl> 1, 0, 0, 0, 0, 0, 1, 0, …
## $ channel_title__BudBowlXLII                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `channel_title__Coca-Cola`                    <dbl> 0, 0, 0, 0, 0, 0, 1, 0, …
## $ channel_title__Funny_Commercials              <dbl> 1, 0, 0, 0, 0, 1, 0, 0, …
## $ channel_title__John_Keehler                   <dbl> 0, 0, 1, 0, 0, 0, 0, 0, …
## $ channel_title__NFL                            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__omon007                        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__reggiep08v2                    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__The_Hall_of_Advertising        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__USA_TODAY                      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ channel_title__World_Hyundai_Matteson         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `channel_title__-OTHER`                       <dbl> 0, 1, 0, 1, 1, 0, 0, 1, …
## $ category_id__1                                <dbl> 1, 0, 0, 0, 0, 1, 0, 0, …
## $ category_id__2                                <dbl> 0, 0, 0, 0, 0, 0, 0, 1, …
## $ category_id__10                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__15                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__17                               <dbl> 0, 0, 1, 0, 0, 0, 0, 0, …
## $ category_id__22                               <dbl> 0, 0, 0, 1, 0, 0, 0, 0, …
## $ category_id__23                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__24                               <dbl> 0, 0, 0, 0, 1, 0, 1, 0, …
## $ category_id__25                               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
## $ category_id__27                               <dbl> 0, 1, 0, 0, 0, 0, 0, 0, …
## $ `category_id__-OTHER`                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, …
# Step 2: Correlate 

data_corr_tbl <- data_binarized_tbl %>% 
    correlate(like_count__6.26909628370626_Inf)

data_corr_tbl
## # A tibble: 62 × 3
##    feature       bin                               correlation
##    <fct>         <chr>                                   <dbl>
##  1 like_count    6.26909628370626_Inf                    1    
##  2 view_count    175482_Inf                              0.715
##  3 dislike_count 24_Inf                                  0.715
##  4 dislike_count -Inf_1                                 -0.355
##  5 like_count    -Inf_2.99573227355399                  -0.339
##  6 dislike_count 1_7                                    -0.339
##  7 view_count    -Inf_6641                              -0.335
##  8 like_count    4.87519732320115_6.26909628370626      -0.331
##  9 like_count    2.99573227355399_4.87519732320115      -0.327
## 10 view_count    6641_43983                             -0.308
## # ℹ 52 more rows
#Step 3: Plot 

data_corr_tbl %>%
    plot_correlation_funnel()
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Build Models

Split Data

data <- sample_n(data, 100)

# Split into train and test dataset
set.seed(1234)
data_split <- rsample::initial_split(data)
data_train <- training(data_split)
data_test <- testing(data_split)

# Further split training dataset for cross-validation 
set.seed(2345)
data_cv <- rsample::vfold_cv(data_train)
data_cv
## #  10-fold cross-validation 
## # A tibble: 10 × 2
##    splits         id    
##    <list>         <chr> 
##  1 <split [67/8]> Fold01
##  2 <split [67/8]> Fold02
##  3 <split [67/8]> Fold03
##  4 <split [67/8]> Fold04
##  5 <split [67/8]> Fold05
##  6 <split [68/7]> Fold06
##  7 <split [68/7]> Fold07
##  8 <split [68/7]> Fold08
##  9 <split [68/7]> Fold09
## 10 <split [68/7]> Fold10
library(usemodels)
## Warning: package 'usemodels' was built under R version 4.4.2
usemodels::use_xgboost(like_count ~ ., data = data_train)
## xgboost_recipe <- 
##   recipe(formula = like_count ~ ., data = data_train) %>% 
##   step_zv(all_predictors()) 
## 
## xgboost_spec <- 
##   boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
##     loss_reduction = tune(), sample_size = tune()) %>% 
##   set_mode("classification") %>% 
##   set_engine("xgboost") 
## 
## xgboost_workflow <- 
##   workflow() %>% 
##   add_recipe(xgboost_recipe) %>% 
##   add_model(xgboost_spec) 
## 
## set.seed(6804)
## xgboost_tune <-
##   tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
# Specify Recipe 
xgboost_recipe <- 
    recipe(formula = like_count ~ ., data = data_train) %>%
    recipes::update_role(id, new_role = "brand variable") %>%
    step_tokenize(title) %>%
    step_tokenfilter(title, max_tokens = 100) %>%
    step_tfidf(title) %>%
    step_other(channel_title)
    
  
xgboost_recipe %>% prep() %>% juice() %>% glimpse()
## Rows: 75
## Columns: 117
## $ year                      <dbl> 2015, 2011, 2003, 2011, 2001, 2008, 2010, 20…
## $ brand                     <fct> Budweiser, Coca-Cola, Bud Light, Bud Light, …
## $ funny                     <fct> FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T…
## $ show_product_quickly      <fct> TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, …
## $ patriotic                 <fct> TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALS…
## $ celebrity                 <fct> FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALS…
## $ danger                    <fct> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE,…
## $ animals                   <fct> TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE…
## $ use_sex                   <fct> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FAL…
## $ id                        <fct> otCxSnu_HXA, nAJvHsVoLq0, _cEQ5g6bzdM, JJn1v…
## $ kind                      <fct> youtube#video, youtube#video, youtube#video,…
## $ view_count                <dbl> 491630, 87396, 3754, 350, 1475, 729583, 1160…
## $ dislike_count             <dbl> 37, 2, 1, 1, 0, 180, 0, 3, 1, 15, 0, 1, 1, 4…
## $ favorite_count            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ channel_title             <fct> other, alohawarriorchief, other, other, othe…
## $ category_id               <fct> 23, 24, 23, 23, 24, 23, 23, 27, 23, 24, 24, …
## $ like_count                <dbl> 7.7527648, 3.9889840, 2.9444390, 0.6931472, …
## $ tfidf_title_03            <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_2000          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2001          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2002          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2005          <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_2007          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2008          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2008genesis   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2009          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2010          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2012          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2013          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2014          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2015          <dbl> 0.3608944, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2016          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2018          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_2019          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_2020          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_42            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_44            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_720p          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_90            <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_a             <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_ad            <dbl> 0.1949499, 0.4678798, 0.0000000, 0.0000000, …
## $ tfidf_title_ads           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_advertisement <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_allowed       <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_another       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_anthem        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_apes          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_babies        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_baby          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bad           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_battle        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_beautiful     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_best          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bestbuds      <dbl> 0.3608944, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_better        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_big           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bmw           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bot           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bowl          <dbl> 0.09708228, 0.23299747, 0.00000000, 0.194164…
## $ tfidf_title_britney       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_brosnan       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_brotherhood   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_brown         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_bubbly        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_buble         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_bubly         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_bud           <dbl> 0.0000000, 0.0000000, 0.7994279, 0.2664760, …
## $ tfidf_title_budlight      <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_budweiser     <dbl> 0.1783388, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_by            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cards         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_carlos        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_casket        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_casting       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cedric        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_checkout      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_chessmaster   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_cindy         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_clydesdale    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_clydesdales   <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_coca          <dbl> 0.0000000, 0.4678798, 0.0000000, 0.0000000, …
## $ tfidf_title_coke          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_cola          <dbl> 0.0000000, 0.4678798, 0.0000000, 0.0000000, …
## $ tfidf_title_collar        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_commercial    <dbl> 0.08173577, 0.00000000, 0.00000000, 0.163471…
## $ tfidf_title_commercials   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_cool          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_date          <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_dog           <dbl> 0.2485961, 0.0000000, 0.0000000, 0.4971922, …
## $ tfidf_title_doritos       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_e             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_elantra       <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.00…
## $ tfidf_title_extended      <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_game          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hd            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_hyundai       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_it            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_kia           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_light         <dbl> 0.0000000, 0.0000000, 0.8211139, 0.2737046, …
## $ tfidf_title_love          <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_monkey        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_nfl           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_of            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_official      <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_pepsi         <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_puppy         <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_sorento       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_spot          <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_super         <dbl> 0.09708228, 0.23299747, 0.00000000, 0.194164…
## $ tfidf_title_superbowl     <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_the           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_toyota        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_trade         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ tfidf_title_tv            <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_vs            <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_xli           <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ tfidf_title_xlix          <dbl> 0.3042215, 0.0000000, 0.0000000, 0.0000000, …
# Specify Model

xgboost_spec <- 
  boost_tree(trees = tune(), min_n = tune(), mtry = tune(), learn_rate = tune()) %>%
    set_mode("regression") %>% 
    set_engine("xgboost") 

xgboost_workflow <- 
  workflow() %>% 
  add_recipe(xgboost_recipe) %>% 
  add_model(xgboost_spec) 

set.seed(127)
xgboost_tune <-
  tune_grid(xgboost_workflow, 
            resamples = data_cv, 
            grid = 5)
## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.

Evaluate Models