Apply 1: Super Bowl Commercials

Goal: What factors lead to the most youtube likes?

Import Data

likes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-02/youtube.csv')

skimr::skim(likes)

Data summary
Name	likes
Number of rows	247
Number of columns	25
_______________________
Column type frequency:
character	10
logical	7
numeric	7
POSIXct	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	n_unique
brand	0	1.00	3	9	10
superbowl_ads_dot_com_url	0	1.00	34	120	244
youtube_url	11	0.96	43	43	233
id	11	0.96	11	11	233
kind	16	0.94	13	13	1
etag	16	0.94	27	27	228
title	16	0.94	6	99	228
description	50	0.80	3	3527	194
thumbnail	129	0.48	48	48	118
channel_title	16	0.94	3	37	185

Variable type: logical

skim_variable	complete_rate	mean	count
funny	1	0.69	TRU: 171, FAL: 76
show_product_quickly	1	0.68	TRU: 169, FAL: 78
patriotic	1	0.17	FAL: 206, TRU: 41
celebrity	1	0.29	FAL: 176, TRU: 71
danger	1	0.30	FAL: 172, TRU: 75
animals	1	0.37	FAL: 155, TRU: 92
use_sex	1	0.27	FAL: 181, TRU: 66

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
year	0	1.00	2010.19	5.86	2000	2005	2010	2015.00	2020	▇▇▇▇▆
view_count	16	0.94	1407556.46	11971111.01	10	6431	41379	170015.50	176373378	▇▁▁▁▁
like_count	22	0.91	4146.03	23920.40	0	19	130	527.00	275362	▇▁▁▁▁
dislike_count	22	0.91	833.54	6948.52	0	1	7	24.00	92990	▇▁▁▁▁
favorite_count	16	0.94	0.00	0.00	0	0	0	0.00	0	▁▁▇▁▁
comment_count	25	0.90	188.64	986.46	0	1	10	50.75	9190	▇▁▁▁▁
category_id	16	0.94	19.32	8.00	1	17	23	24.00	29	▃▁▂▆▇

Variable type: POSIXct

skim_variable	n_missing	complete_rate	min	max	median	n_unique
published_at	16	0.94	2006-02-06 10:02:36	2021-01-27 13:11:29	2013-01-31 09:13:55	227

# Treat Missing Values
Youtube <- likes %>%
    select(-thumbnail, -superbowl_ads_dot_com_url, -youtube_url, -dislike_count) %>%
    na.omit() %>%

    # Transform data
     mutate(across(c(view_count, like_count, comment_count), log),
    across(where(is.logical), as.factor)) %>%
    mutate(across(where(is.character), as.factor)) %>%
    mutate(category_id = as.factor(category_id)) %>%
    mutate(brand = as.character(brand))

Explore Data

Identify Good predictors

funny

Youtube %>%
    ggplot(aes(like_count, as.numeric(funny))) +
    scale_x_log10() +
    geom_point()

## Warning in transformation$transform(x): NaNs produced

## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).

brand

Youtube %>%
    ggplot(aes(x = brand, y = view_count)) +
    scale_y_log10() +
    geom_boxplot()

title

Youtube %>%
  # Group by brand
  group_by(brand) %>%
  
  # Calculate average like count per brand
  summarize(like_count = mean(like_count, na.rm = TRUE),
            n = n()) %>%
  
  # Calculate brands with highest like count
  filter(n > 10) %>%
  slice_max(order_by = like_count, n = 20) %>%
  
  # Plot
  ggplot(aes(x = like_count, y = fct_reorder(brand, like_count))) +
  geom_point() +
  labs(
    title = "Top 6 Brands with Highest Avg Like Count",
    x = "Average Like Count",
    y = "Brand"
  )

EDA Shortcut

data_binarized_table <- Youtube %>%
    select(-id, -kind, -etag, -published_at, -description, -channel_title, category_id, -title) %>%
    binarize()


data_binarized_table %>% glimpse

## Rows: 190
## Columns: 51
## $ `year__-Inf_2006`                                 <dbl> 0, 1, 0, 1, 0, 0, 0,…
## $ year__2006_2010                                   <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ year__2010_2014.75                                <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ year__2014.75_Inf                                 <dbl> 1, 0, 1, 0, 1, 1, 1,…
## $ brand__Bud_Light                                  <dbl> 1, 1, 0, 1, 0, 0, 0,…
## $ brand__Budweiser                                  <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ `brand__Coca-Cola`                                <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ brand__Doritos                                    <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ `brand__E-Trade`                                  <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ brand__Hynudai                                    <dbl> 0, 0, 1, 0, 0, 0, 0,…
## $ brand__Kia                                        <dbl> 0, 0, 0, 0, 0, 0, 1,…
## $ brand__NFL                                        <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ brand__Pepsi                                      <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ brand__Toyota                                     <dbl> 0, 0, 0, 0, 1, 0, 0,…
## $ funny__FALSE                                      <dbl> 0, 0, 1, 0, 0, 0, 1,…
## $ funny__TRUE                                       <dbl> 1, 1, 0, 1, 1, 1, 0,…
## $ show_product_quickly__FALSE                       <dbl> 0, 1, 0, 0, 0, 1, 1,…
## $ show_product_quickly__TRUE                        <dbl> 1, 0, 1, 1, 1, 0, 0,…
## $ patriotic__FALSE                                  <dbl> 1, 1, 1, 1, 1, 1, 1,…
## $ patriotic__TRUE                                   <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ celebrity__FALSE                                  <dbl> 0, 1, 1, 1, 0, 0, 0,…
## $ celebrity__TRUE                                   <dbl> 1, 0, 0, 0, 1, 1, 1,…
## $ danger__FALSE                                     <dbl> 0, 0, 1, 0, 0, 1, 1,…
## $ danger__TRUE                                      <dbl> 1, 1, 0, 1, 1, 0, 0,…
## $ animals__FALSE                                    <dbl> 1, 0, 1, 0, 0, 0, 1,…
## $ animals__TRUE                                     <dbl> 0, 1, 0, 1, 1, 1, 0,…
## $ use_sex__FALSE                                    <dbl> 1, 1, 1, 0, 1, 1, 1,…
## $ use_sex__TRUE                                     <dbl> 0, 0, 0, 1, 0, 0, 0,…
## $ `view_count__-Inf_9.25738853430794`               <dbl> 0, 0, 1, 0, 0, 0, 0,…
## $ view_count__9.25738853430794_10.9763015508417     <dbl> 1, 0, 0, 1, 1, 0, 1,…
## $ view_count__10.9763015508417_12.2976472113071     <dbl> 0, 1, 0, 0, 0, 0, 0,…
## $ view_count__12.2976472113071_Inf                  <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ `like_count__-Inf_3.46573590279973`               <dbl> 0, 0, 1, 1, 0, 0, 0,…
## $ like_count__3.46573590279973_5.10587200661176     <dbl> 0, 1, 0, 0, 1, 0, 1,…
## $ like_count__5.10587200661176_6.37800137471335     <dbl> 1, 0, 0, 0, 0, 0, 0,…
## $ like_count__6.37800137471335_Inf                  <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ `comment_count__-Inf_0.693147180559945`           <dbl> 0, 0, 1, 1, 0, 0, 0,…
## $ comment_count__0.693147180559945_2.70805020110221 <dbl> 1, 1, 0, 0, 1, 0, 1,…
## $ comment_count__2.70805020110221_4.17438726989564  <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ comment_count__4.17438726989564_Inf               <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ category_id__1                                    <dbl> 0, 0, 0, 0, 1, 0, 0,…
## $ category_id__2                                    <dbl> 0, 0, 0, 0, 0, 0, 1,…
## $ category_id__10                                   <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__15                                   <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__17                                   <dbl> 0, 1, 0, 0, 0, 0, 0,…
## $ category_id__22                                   <dbl> 0, 0, 1, 0, 0, 0, 0,…
## $ category_id__23                                   <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__24                                   <dbl> 0, 0, 0, 1, 0, 1, 0,…
## $ category_id__25                                   <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__27                                   <dbl> 1, 0, 0, 0, 0, 0, 0,…
## $ `category_id__-OTHER`                             <dbl> 0, 0, 0, 0, 0, 0, 0,…

# Step 2: Correlate
data_corr_table <- data_binarized_table %>% 
    correlate( like_count__6.37800137471335_Inf)

data_corr_table

## # A tibble: 51 × 3
##    feature       bin                                correlation
##    <fct>         <chr>                                    <dbl>
##  1 like_count    6.37800137471335_Inf                     1    
##  2 view_count    12.2976472113071_Inf                     0.777
##  3 comment_count 4.17438726989564_Inf                     0.761
##  4 comment_count -Inf_0.693147180559945                  -0.362
##  5 like_count    -Inf_3.46573590279973                   -0.343
##  6 view_count    -Inf_9.25738853430794                   -0.338
##  7 view_count    9.25738853430794_10.9763015508417       -0.333
##  8 like_count    5.10587200661176_6.37800137471335       -0.333
##  9 like_count    3.46573590279973_5.10587200661176       -0.329
## 10 comment_count 0.693147180559945_2.70805020110221      -0.329
## # ℹ 41 more rows

# Step 3: Plot
data_corr_table %>%
    plot_correlation_funnel()

## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Split Data

# Split into train and test data set
set.seed(123)
youtube_split <- initial_split(Youtube)
youtube_train <- training(youtube_split)
youtube_test <- testing(youtube_split)

# Further split training data set for cross-validation
set.seed(234)
youtube_cv <- vfold_cv(youtube_train)
youtube_cv

## #  10-fold cross-validation 
## # A tibble: 10 × 2
##    splits           id    
##    <list>           <chr> 
##  1 <split [127/15]> Fold01
##  2 <split [127/15]> Fold02
##  3 <split [128/14]> Fold03
##  4 <split [128/14]> Fold04
##  5 <split [128/14]> Fold05
##  6 <split [128/14]> Fold06
##  7 <split [128/14]> Fold07
##  8 <split [128/14]> Fold08
##  9 <split [128/14]> Fold09
## 10 <split [128/14]> Fold10

library(usemodels)
usemodels::use_xgboost(like_count~., data = youtube_train)

## xgboost_recipe <- 
##   recipe(formula = like_count ~ ., data = youtube_train) %>% 
##   step_zv(all_predictors()) 
## 
## xgboost_spec <- 
##   boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
##     loss_reduction = tune(), sample_size = tune()) %>% 
##   set_mode("classification") %>% 
##   set_engine("xgboost") 
## 
## xgboost_workflow <- 
##   workflow() %>% 
##   add_recipe(xgboost_recipe) %>% 
##   add_model(xgboost_spec) 
## 
## set.seed(36499)
## xgboost_tune <-
##   tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))

xgboost_youtube_recipe1 <- 
  recipe(like_count ~ title + category_id + brand + funny, data = youtube_train) %>%
    step_other(category_id, threshold = 0.05) %>%  
    step_tokenize(title) %>%
    step_tokenfilter(title, max_tokens = 100) %>%
    step_tfidf(title) %>%
    step_dummy(category_id, brand, funny, one_hot = TRUE) %>%
    step_zv(all_numeric_predictors()) %>%
    step_normalize(all_numeric_predictors()) 

xgboost_youtube_recipe1 %>% prep() %>% juice() %>% glimpse()

## Rows: 142
## Columns: 120
## $ like_count              <dbl> 7.686621, 10.120211, 12.074990, 6.637258, 5.76…
## $ tfidf_title_2000        <dbl> -0.1456353, -0.1456353, -0.1456353, -0.1456353…
## $ tfidf_title_2001        <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_2002        <dbl> -0.1082934, -0.1082934, -0.1082934, -0.1082934…
## $ tfidf_title_2005        <dbl> -0.1227573, -0.1227573, -0.1227573, -0.1227573…
## $ tfidf_title_2007        <dbl> -0.1657765, -0.1657765, -0.1657765, -0.1657765…
## $ tfidf_title_2009        <dbl> -0.1678792, -0.1678792, -0.1678792, -0.1678792…
## $ tfidf_title_2010        <dbl> -0.1566958, -0.1566958, -0.1566958, -0.1566958…
## $ tfidf_title_2012        <dbl> -0.2180307, -0.2180307, -0.2180307, -0.2180307…
## $ tfidf_title_2013        <dbl> -0.1639145, -0.1639145, -0.1639145, -0.1639145…
## $ tfidf_title_2014        <dbl> -0.2069703, -0.2069703, -0.2069703, -0.2069703…
## $ tfidf_title_2015        <dbl> -0.1667307, -0.1667307, -0.1667307, -0.1667307…
## $ tfidf_title_2016        <dbl> -0.1675157, -0.1675157, -0.1675157, -0.1675157…
## $ tfidf_title_2017        <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_2018        <dbl> -0.1878505, -0.1878505, -0.1878505, -0.1878505…
## $ tfidf_title_2019        <dbl> -0.1881418, -0.1881418, -0.1881418, -0.1881418…
## $ tfidf_title_2020        <dbl> -0.1669423, -0.1669423, -0.1669423, -0.1669423…
## $ tfidf_title_44          <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_a           <dbl> -0.1670646, -0.1670646, -0.1670646, -0.1670646…
## $ tfidf_title_ad          <dbl> -0.3395087, -0.3395087, -0.3395087, -0.3395087…
## $ tfidf_title_ads         <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_baby        <dbl> -0.1257805, -0.1257805, -0.1257805, 4.2041172,…
## $ tfidf_title_best        <dbl> -0.1461105, -0.1461105, -0.1461105, -0.1461105…
## $ tfidf_title_big         <dbl> -0.1434314, -0.1434314, -0.1434314, -0.1434314…
## $ tfidf_title_bowl        <dbl> -0.7864152, -0.7864152, 1.8550289, -0.7864152,…
## $ tfidf_title_bud         <dbl> -0.4911356, -0.4911356, -0.4911356, -0.4911356…
## $ tfidf_title_budweiser   <dbl> 5.119667, 2.376938, -0.365792, -0.365792, -0.3…
## $ tfidf_title_camry       <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_cedric      <dbl> -0.1623136, -0.1623136, -0.1623136, -0.1623136…
## $ tfidf_title_clydesdale  <dbl> -0.110474, -0.110474, -0.110474, -0.110474, -0…
## $ tfidf_title_coca        <dbl> -0.2346209, -0.2346209, -0.2346209, -0.2346209…
## $ tfidf_title_coke        <dbl> -0.1645665, -0.1645665, -0.1645665, -0.1645665…
## $ tfidf_title_cola        <dbl> -0.2482818, -0.2482818, -0.2482818, -0.2482818…
## $ tfidf_title_commercial  <dbl> -0.93672021, 2.50526931, 0.78427455, 0.4400756…
## $ tfidf_title_commercials <dbl> -0.1186052, -0.1186052, -0.1186052, -0.1186052…
## $ tfidf_title_cool        <dbl> -0.1461105, -0.1461105, -0.1461105, -0.1461105…
## $ tfidf_title_crash       <dbl> -0.1897352, -0.1897352, -0.1897352, -0.1897352…
## $ tfidf_title_date        <dbl> -0.1167563, -0.1167563, -0.1167563, -0.1167563…
## $ tfidf_title_diet        <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_dilly       <dbl> -0.08391814, -0.08391814, -0.08391814, -0.0839…
## $ tfidf_title_dog         <dbl> -0.1543472, -0.1543472, -0.1543472, -0.1543472…
## $ tfidf_title_dogs        <dbl> -0.1002749, -0.1002749, -0.1002749, -0.1002749…
## $ tfidf_title_doritos     <dbl> -0.2600431, -0.2600431, -0.2600431, -0.2600431…
## $ tfidf_title_e           <dbl> -0.1645665, -0.1645665, -0.1645665, -0.1645665…
## $ tfidf_title_elantra     <dbl> -0.1160093, -0.1160093, -0.1160093, -0.1160093…
## $ tfidf_title_epic        <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_etrade      <dbl> -0.1508209, -0.1508209, -0.1508209, 3.8647862,…
## $ tfidf_title_extended    <dbl> -0.1462371, -0.1462371, -0.1462371, -0.1462371…
## $ tfidf_title_factory     <dbl> -0.1187455, -0.1187455, -0.1187455, -0.1187455…
## $ tfidf_title_fantasy     <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_featuring   <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_fly         <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_ft          <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_full        <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_funny       <dbl> -0.1370193, -0.1370193, -0.1370193, -0.1370193…
## $ tfidf_title_game        <dbl> -0.1645354, -0.1645354, -0.1645354, -0.1645354…
## $ tfidf_title_girlfriend  <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_happiness   <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_hd          <dbl> -0.1634768, -0.1634768, -0.1634768, -0.1634768…
## $ tfidf_title_hyundai     <dbl> -0.2217037, -0.2217037, -0.2217037, -0.2217037…
## $ tfidf_title_inside      <dbl> -0.1063756, -0.1063756, -0.1063756, -0.1063756…
## $ tfidf_title_island      <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_jackie      <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_kia         <dbl> -0.212374, -0.212374, -0.212374, -0.212374, -0…
## $ tfidf_title_king        <dbl> -0.1178874, -0.1178874, -0.1178874, -0.1178874…
## $ tfidf_title_legends     <dbl> -0.1178874, -0.1178874, -0.1178874, -0.1178874…
## $ tfidf_title_light       <dbl> -0.4811747, -0.4811747, -0.4811747, -0.4811747…
## $ tfidf_title_lighta      <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_love        <dbl> -0.1114199, -0.1114199, -0.1114199, -0.1114199…
## $ tfidf_title_meter       <dbl> -0.1456353, -0.1456353, -0.1456353, -0.1456353…
## $ tfidf_title_new         <dbl> -0.182815, -0.182815, -0.182815, -0.182815, -0…
## $ tfidf_title_nfl         <dbl> -0.1659398, -0.1659398, 6.8232194, -0.1659398,…
## $ tfidf_title_of          <dbl> -0.1380102, -0.1380102, -0.1380102, -0.1380102…
## $ tfidf_title_official    <dbl> -0.1683342, -0.1683342, -0.1683342, -0.1683342…
## $ tfidf_title_on          <dbl> -0.146020, -0.146020, -0.146020, -0.146020, -0…
## $ tfidf_title_one         <dbl> -0.1186052, -0.1186052, -0.1186052, -0.1186052…
## $ tfidf_title_optima      <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_party       <dbl> -0.1110605, -0.1110605, -0.1110605, -0.1110605…
## $ tfidf_title_pepsi       <dbl> -0.2727937, -0.2727937, -0.2727937, -0.2727937…
## $ tfidf_title_puppy       <dbl> -0.114315, -0.114315, -0.114315, -0.114315, -0…
## $ tfidf_title_ride        <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_spot        <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_starring    <dbl> -0.1392022, -0.1392022, -0.1392022, -0.1392022…
## $ tfidf_title_super       <dbl> -0.7864152, -0.7864152, 1.8550289, -0.7864152,…
## $ tfidf_title_superbowl   <dbl> -0.2784949, -0.2784949, -0.2784949, 3.2090502,…
## $ tfidf_title_the         <dbl> -0.3175183, -0.3175183, -0.3175183, -0.3175183…
## $ tfidf_title_to          <dbl> -0.1160093, -0.1160093, -0.1160093, -0.1160093…
## $ tfidf_title_toyota      <dbl> -0.1353587, -0.1353587, -0.1353587, -0.1353587…
## $ tfidf_title_trade       <dbl> -0.1645665, -0.1645665, -0.1645665, -0.1645665…
## $ tfidf_title_tv          <dbl> -0.2030553, -0.2030553, -0.2030553, -0.2030553…
## $ tfidf_title_up          <dbl> -0.1178874, -0.1178874, -0.1178874, -0.1178874…
## $ tfidf_title_usa         <dbl> -0.1411492, -0.1411492, -0.1411492, -0.1411492…
## $ tfidf_title_version     <dbl> -0.1680323, -0.1680323, -0.1680323, -0.1680323…
## $ tfidf_title_vs          <dbl> -0.14436, -0.14436, -0.14436, -0.14436, -0.144…
## $ tfidf_title_winner      <dbl> -0.1897352, -0.1897352, -0.1897352, -0.1897352…
## $ tfidf_title_x           <dbl> -0.1337423, -0.1337423, -0.1337423, -0.1337423…
## $ tfidf_title_xli         <dbl> -0.1187455, -0.1187455, -0.1187455, -0.1187455…
## $ tfidf_title_xliii       <dbl> -0.2059473, -0.2059473, -0.2059473, 5.7783139,…
## $ tfidf_title_xliv        <dbl> -0.1151266, -0.1151266, -0.1151266, -0.1151266…
## $ tfidf_title_xxxvi       <dbl> -0.1160093, -0.1160093, -0.1160093, -0.1160093…
## $ tfidf_title_zero        <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ category_id_X1          <dbl> -0.2742701, -0.2742701, -0.2742701, -0.2742701…
## $ category_id_X2          <dbl> -0.2742701, -0.2742701, -0.2742701, -0.2742701…
## $ category_id_X17         <dbl> -0.2887527, -0.2887527, 3.4387824, -0.2887527,…
## $ category_id_X22         <dbl> -0.3550914, -0.3550914, -0.3550914, -0.3550914…
## $ category_id_X23         <dbl> -0.4828346, 2.0565176, -0.4828346, -0.4828346,…
## $ category_id_X24         <dbl> 1.2720666, -0.7805863, -0.7805863, 1.2720666, …
## $ category_id_other       <dbl> -0.3295524, -0.3295524, -0.3295524, -0.3295524…
## $ brand_Bud.Light         <dbl> -0.5699116, -0.5699116, -0.5699116, -0.5699116…
## $ brand_Budweiser         <dbl> 2.2095344, 2.2095344, -0.4493968, -0.4493968, …
## $ brand_Coca.Cola         <dbl> -0.3027501, -0.3027501, -0.3027501, -0.3027501…
## $ brand_Doritos           <dbl> -0.3550914, -0.3550914, -0.3550914, -0.3550914…
## $ brand_E.Trade           <dbl> -0.243477, -0.243477, -0.243477, 4.078240, -0.…
## $ brand_Hynudai           <dbl> -0.3163313, -0.3163313, -0.3163313, -0.3163313…
## $ brand_Kia               <dbl> -0.2269068, -0.2269068, -0.2269068, -0.2269068…
## $ brand_NFL               <dbl> -0.2093011, -0.2093011, 4.7441588, -0.2093011,…
## $ brand_Pepsi             <dbl> -0.3550914, -0.3550914, -0.3550914, -0.3550914…
## $ brand_Toyota            <dbl> -0.1903663, -0.1903663, -0.1903663, -0.1903663…
## $ funny_FALSE.            <dbl> -0.6567227, -0.6567227, 1.5119894, -0.6567227,…
## $ funny_TRUE.             <dbl> 0.6567227, 0.6567227, -1.5119894, 0.6567227, -…

# Specify Model
xgboost_spec_youtube1 <- 
  boost_tree(trees = tune(), min_n = tune()) %>% 
  set_mode("regression") %>% 
  set_engine("xgboost") 

# Combine Recipe and Model Using Workflow
xgboost_workflow_youtube1 <- 
  workflow() %>% 
  add_recipe(xgboost_youtube_recipe1) %>% 
  add_model(xgboost_spec_youtube1)

# Tune Hyperparameters
set.seed(678)
tuned_youtube1 <-
  tune_grid(xgboost_workflow_youtube1,
            resamples = youtube_cv,
            grid = 5)

## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.

Apply 1: Super Bowl Commercials

Shea Clark

2025-02-07

Import Data

Explore Data

Split Data