Goal: What factors lead to the most youtube likes?
likes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2021/2021-03-02/youtube.csv')
skimr::skim(likes)
Name | likes |
Number of rows | 247 |
Number of columns | 25 |
_______________________ | |
Column type frequency: | |
character | 10 |
logical | 7 |
numeric | 7 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
brand | 0 | 1.00 | 3 | 9 | 0 | 10 | 0 |
superbowl_ads_dot_com_url | 0 | 1.00 | 34 | 120 | 0 | 244 | 0 |
youtube_url | 11 | 0.96 | 43 | 43 | 0 | 233 | 0 |
id | 11 | 0.96 | 11 | 11 | 0 | 233 | 0 |
kind | 16 | 0.94 | 13 | 13 | 0 | 1 | 0 |
etag | 16 | 0.94 | 27 | 27 | 0 | 228 | 0 |
title | 16 | 0.94 | 6 | 99 | 0 | 228 | 0 |
description | 50 | 0.80 | 3 | 3527 | 0 | 194 | 0 |
thumbnail | 129 | 0.48 | 48 | 48 | 0 | 118 | 0 |
channel_title | 16 | 0.94 | 3 | 37 | 0 | 185 | 0 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
funny | 0 | 1 | 0.69 | TRU: 171, FAL: 76 |
show_product_quickly | 0 | 1 | 0.68 | TRU: 169, FAL: 78 |
patriotic | 0 | 1 | 0.17 | FAL: 206, TRU: 41 |
celebrity | 0 | 1 | 0.29 | FAL: 176, TRU: 71 |
danger | 0 | 1 | 0.30 | FAL: 172, TRU: 75 |
animals | 0 | 1 | 0.37 | FAL: 155, TRU: 92 |
use_sex | 0 | 1 | 0.27 | FAL: 181, TRU: 66 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 0 | 1.00 | 2010.19 | 5.86 | 2000 | 2005 | 2010 | 2015.00 | 2020 | ▇▇▇▇▆ |
view_count | 16 | 0.94 | 1407556.46 | 11971111.01 | 10 | 6431 | 41379 | 170015.50 | 176373378 | ▇▁▁▁▁ |
like_count | 22 | 0.91 | 4146.03 | 23920.40 | 0 | 19 | 130 | 527.00 | 275362 | ▇▁▁▁▁ |
dislike_count | 22 | 0.91 | 833.54 | 6948.52 | 0 | 1 | 7 | 24.00 | 92990 | ▇▁▁▁▁ |
favorite_count | 16 | 0.94 | 0.00 | 0.00 | 0 | 0 | 0 | 0.00 | 0 | ▁▁▇▁▁ |
comment_count | 25 | 0.90 | 188.64 | 986.46 | 0 | 1 | 10 | 50.75 | 9190 | ▇▁▁▁▁ |
category_id | 16 | 0.94 | 19.32 | 8.00 | 1 | 17 | 23 | 24.00 | 29 | ▃▁▂▆▇ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
published_at | 16 | 0.94 | 2006-02-06 10:02:36 | 2021-01-27 13:11:29 | 2013-01-31 09:13:55 | 227 |
# Treat Missing Values
Youtube <- likes %>%
select(-thumbnail, -superbowl_ads_dot_com_url, -youtube_url, -dislike_count) %>%
na.omit() %>%
# Transform data
mutate(across(c(view_count, like_count, comment_count), log),
across(where(is.logical), as.factor)) %>%
mutate(across(where(is.character), as.factor)) %>%
mutate(category_id = as.factor(category_id)) %>%
mutate(brand = as.character(brand))
Identify Good predictors
funny
Youtube %>%
ggplot(aes(like_count, as.numeric(funny))) +
scale_x_log10() +
geom_point()
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_point()`).
brand
Youtube %>%
ggplot(aes(x = brand, y = view_count)) +
scale_y_log10() +
geom_boxplot()
title
Youtube %>%
# Group by brand
group_by(brand) %>%
# Calculate average like count per brand
summarize(like_count = mean(like_count, na.rm = TRUE),
n = n()) %>%
# Calculate brands with highest like count
filter(n > 10) %>%
slice_max(order_by = like_count, n = 20) %>%
# Plot
ggplot(aes(x = like_count, y = fct_reorder(brand, like_count))) +
geom_point() +
labs(
title = "Top 6 Brands with Highest Avg Like Count",
x = "Average Like Count",
y = "Brand"
)
EDA Shortcut
data_binarized_table <- Youtube %>%
select(-id, -kind, -etag, -published_at, -description, -channel_title, category_id, -title) %>%
binarize()
data_binarized_table %>% glimpse
## Rows: 190
## Columns: 51
## $ `year__-Inf_2006` <dbl> 0, 1, 0, 1, 0, 0, 0,…
## $ year__2006_2010 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ year__2010_2014.75 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ year__2014.75_Inf <dbl> 1, 0, 1, 0, 1, 1, 1,…
## $ brand__Bud_Light <dbl> 1, 1, 0, 1, 0, 0, 0,…
## $ brand__Budweiser <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ `brand__Coca-Cola` <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ brand__Doritos <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ `brand__E-Trade` <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ brand__Hynudai <dbl> 0, 0, 1, 0, 0, 0, 0,…
## $ brand__Kia <dbl> 0, 0, 0, 0, 0, 0, 1,…
## $ brand__NFL <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ brand__Pepsi <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ brand__Toyota <dbl> 0, 0, 0, 0, 1, 0, 0,…
## $ funny__FALSE <dbl> 0, 0, 1, 0, 0, 0, 1,…
## $ funny__TRUE <dbl> 1, 1, 0, 1, 1, 1, 0,…
## $ show_product_quickly__FALSE <dbl> 0, 1, 0, 0, 0, 1, 1,…
## $ show_product_quickly__TRUE <dbl> 1, 0, 1, 1, 1, 0, 0,…
## $ patriotic__FALSE <dbl> 1, 1, 1, 1, 1, 1, 1,…
## $ patriotic__TRUE <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ celebrity__FALSE <dbl> 0, 1, 1, 1, 0, 0, 0,…
## $ celebrity__TRUE <dbl> 1, 0, 0, 0, 1, 1, 1,…
## $ danger__FALSE <dbl> 0, 0, 1, 0, 0, 1, 1,…
## $ danger__TRUE <dbl> 1, 1, 0, 1, 1, 0, 0,…
## $ animals__FALSE <dbl> 1, 0, 1, 0, 0, 0, 1,…
## $ animals__TRUE <dbl> 0, 1, 0, 1, 1, 1, 0,…
## $ use_sex__FALSE <dbl> 1, 1, 1, 0, 1, 1, 1,…
## $ use_sex__TRUE <dbl> 0, 0, 0, 1, 0, 0, 0,…
## $ `view_count__-Inf_9.25738853430794` <dbl> 0, 0, 1, 0, 0, 0, 0,…
## $ view_count__9.25738853430794_10.9763015508417 <dbl> 1, 0, 0, 1, 1, 0, 1,…
## $ view_count__10.9763015508417_12.2976472113071 <dbl> 0, 1, 0, 0, 0, 0, 0,…
## $ view_count__12.2976472113071_Inf <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ `like_count__-Inf_3.46573590279973` <dbl> 0, 0, 1, 1, 0, 0, 0,…
## $ like_count__3.46573590279973_5.10587200661176 <dbl> 0, 1, 0, 0, 1, 0, 1,…
## $ like_count__5.10587200661176_6.37800137471335 <dbl> 1, 0, 0, 0, 0, 0, 0,…
## $ like_count__6.37800137471335_Inf <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ `comment_count__-Inf_0.693147180559945` <dbl> 0, 0, 1, 1, 0, 0, 0,…
## $ comment_count__0.693147180559945_2.70805020110221 <dbl> 1, 1, 0, 0, 1, 0, 1,…
## $ comment_count__2.70805020110221_4.17438726989564 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ comment_count__4.17438726989564_Inf <dbl> 0, 0, 0, 0, 0, 1, 0,…
## $ category_id__1 <dbl> 0, 0, 0, 0, 1, 0, 0,…
## $ category_id__2 <dbl> 0, 0, 0, 0, 0, 0, 1,…
## $ category_id__10 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__15 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__17 <dbl> 0, 1, 0, 0, 0, 0, 0,…
## $ category_id__22 <dbl> 0, 0, 1, 0, 0, 0, 0,…
## $ category_id__23 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__24 <dbl> 0, 0, 0, 1, 0, 1, 0,…
## $ category_id__25 <dbl> 0, 0, 0, 0, 0, 0, 0,…
## $ category_id__27 <dbl> 1, 0, 0, 0, 0, 0, 0,…
## $ `category_id__-OTHER` <dbl> 0, 0, 0, 0, 0, 0, 0,…
# Step 2: Correlate
data_corr_table <- data_binarized_table %>%
correlate( like_count__6.37800137471335_Inf)
data_corr_table
## # A tibble: 51 × 3
## feature bin correlation
## <fct> <chr> <dbl>
## 1 like_count 6.37800137471335_Inf 1
## 2 view_count 12.2976472113071_Inf 0.777
## 3 comment_count 4.17438726989564_Inf 0.761
## 4 comment_count -Inf_0.693147180559945 -0.362
## 5 like_count -Inf_3.46573590279973 -0.343
## 6 view_count -Inf_9.25738853430794 -0.338
## 7 view_count 9.25738853430794_10.9763015508417 -0.333
## 8 like_count 5.10587200661176_6.37800137471335 -0.333
## 9 like_count 3.46573590279973_5.10587200661176 -0.329
## 10 comment_count 0.693147180559945_2.70805020110221 -0.329
## # ℹ 41 more rows
# Step 3: Plot
data_corr_table %>%
plot_correlation_funnel()
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Split into train and test data set
set.seed(123)
youtube_split <- initial_split(Youtube)
youtube_train <- training(youtube_split)
youtube_test <- testing(youtube_split)
# Further split training data set for cross-validation
set.seed(234)
youtube_cv <- vfold_cv(youtube_train)
youtube_cv
## # 10-fold cross-validation
## # A tibble: 10 × 2
## splits id
## <list> <chr>
## 1 <split [127/15]> Fold01
## 2 <split [127/15]> Fold02
## 3 <split [128/14]> Fold03
## 4 <split [128/14]> Fold04
## 5 <split [128/14]> Fold05
## 6 <split [128/14]> Fold06
## 7 <split [128/14]> Fold07
## 8 <split [128/14]> Fold08
## 9 <split [128/14]> Fold09
## 10 <split [128/14]> Fold10
library(usemodels)
usemodels::use_xgboost(like_count~., data = youtube_train)
## xgboost_recipe <-
## recipe(formula = like_count ~ ., data = youtube_train) %>%
## step_zv(all_predictors())
##
## xgboost_spec <-
## boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
## loss_reduction = tune(), sample_size = tune()) %>%
## set_mode("classification") %>%
## set_engine("xgboost")
##
## xgboost_workflow <-
## workflow() %>%
## add_recipe(xgboost_recipe) %>%
## add_model(xgboost_spec)
##
## set.seed(36499)
## xgboost_tune <-
## tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
xgboost_youtube_recipe1 <-
recipe(like_count ~ title + category_id + brand + funny, data = youtube_train) %>%
step_other(category_id, threshold = 0.05) %>%
step_tokenize(title) %>%
step_tokenfilter(title, max_tokens = 100) %>%
step_tfidf(title) %>%
step_dummy(category_id, brand, funny, one_hot = TRUE) %>%
step_zv(all_numeric_predictors()) %>%
step_normalize(all_numeric_predictors())
xgboost_youtube_recipe1 %>% prep() %>% juice() %>% glimpse()
## Rows: 142
## Columns: 120
## $ like_count <dbl> 7.686621, 10.120211, 12.074990, 6.637258, 5.76…
## $ tfidf_title_2000 <dbl> -0.1456353, -0.1456353, -0.1456353, -0.1456353…
## $ tfidf_title_2001 <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_2002 <dbl> -0.1082934, -0.1082934, -0.1082934, -0.1082934…
## $ tfidf_title_2005 <dbl> -0.1227573, -0.1227573, -0.1227573, -0.1227573…
## $ tfidf_title_2007 <dbl> -0.1657765, -0.1657765, -0.1657765, -0.1657765…
## $ tfidf_title_2009 <dbl> -0.1678792, -0.1678792, -0.1678792, -0.1678792…
## $ tfidf_title_2010 <dbl> -0.1566958, -0.1566958, -0.1566958, -0.1566958…
## $ tfidf_title_2012 <dbl> -0.2180307, -0.2180307, -0.2180307, -0.2180307…
## $ tfidf_title_2013 <dbl> -0.1639145, -0.1639145, -0.1639145, -0.1639145…
## $ tfidf_title_2014 <dbl> -0.2069703, -0.2069703, -0.2069703, -0.2069703…
## $ tfidf_title_2015 <dbl> -0.1667307, -0.1667307, -0.1667307, -0.1667307…
## $ tfidf_title_2016 <dbl> -0.1675157, -0.1675157, -0.1675157, -0.1675157…
## $ tfidf_title_2017 <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_2018 <dbl> -0.1878505, -0.1878505, -0.1878505, -0.1878505…
## $ tfidf_title_2019 <dbl> -0.1881418, -0.1881418, -0.1881418, -0.1881418…
## $ tfidf_title_2020 <dbl> -0.1669423, -0.1669423, -0.1669423, -0.1669423…
## $ tfidf_title_44 <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_a <dbl> -0.1670646, -0.1670646, -0.1670646, -0.1670646…
## $ tfidf_title_ad <dbl> -0.3395087, -0.3395087, -0.3395087, -0.3395087…
## $ tfidf_title_ads <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_baby <dbl> -0.1257805, -0.1257805, -0.1257805, 4.2041172,…
## $ tfidf_title_best <dbl> -0.1461105, -0.1461105, -0.1461105, -0.1461105…
## $ tfidf_title_big <dbl> -0.1434314, -0.1434314, -0.1434314, -0.1434314…
## $ tfidf_title_bowl <dbl> -0.7864152, -0.7864152, 1.8550289, -0.7864152,…
## $ tfidf_title_bud <dbl> -0.4911356, -0.4911356, -0.4911356, -0.4911356…
## $ tfidf_title_budweiser <dbl> 5.119667, 2.376938, -0.365792, -0.365792, -0.3…
## $ tfidf_title_camry <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_cedric <dbl> -0.1623136, -0.1623136, -0.1623136, -0.1623136…
## $ tfidf_title_clydesdale <dbl> -0.110474, -0.110474, -0.110474, -0.110474, -0…
## $ tfidf_title_coca <dbl> -0.2346209, -0.2346209, -0.2346209, -0.2346209…
## $ tfidf_title_coke <dbl> -0.1645665, -0.1645665, -0.1645665, -0.1645665…
## $ tfidf_title_cola <dbl> -0.2482818, -0.2482818, -0.2482818, -0.2482818…
## $ tfidf_title_commercial <dbl> -0.93672021, 2.50526931, 0.78427455, 0.4400756…
## $ tfidf_title_commercials <dbl> -0.1186052, -0.1186052, -0.1186052, -0.1186052…
## $ tfidf_title_cool <dbl> -0.1461105, -0.1461105, -0.1461105, -0.1461105…
## $ tfidf_title_crash <dbl> -0.1897352, -0.1897352, -0.1897352, -0.1897352…
## $ tfidf_title_date <dbl> -0.1167563, -0.1167563, -0.1167563, -0.1167563…
## $ tfidf_title_diet <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_dilly <dbl> -0.08391814, -0.08391814, -0.08391814, -0.0839…
## $ tfidf_title_dog <dbl> -0.1543472, -0.1543472, -0.1543472, -0.1543472…
## $ tfidf_title_dogs <dbl> -0.1002749, -0.1002749, -0.1002749, -0.1002749…
## $ tfidf_title_doritos <dbl> -0.2600431, -0.2600431, -0.2600431, -0.2600431…
## $ tfidf_title_e <dbl> -0.1645665, -0.1645665, -0.1645665, -0.1645665…
## $ tfidf_title_elantra <dbl> -0.1160093, -0.1160093, -0.1160093, -0.1160093…
## $ tfidf_title_epic <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_etrade <dbl> -0.1508209, -0.1508209, -0.1508209, 3.8647862,…
## $ tfidf_title_extended <dbl> -0.1462371, -0.1462371, -0.1462371, -0.1462371…
## $ tfidf_title_factory <dbl> -0.1187455, -0.1187455, -0.1187455, -0.1187455…
## $ tfidf_title_fantasy <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_featuring <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_fly <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_ft <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_full <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_funny <dbl> -0.1370193, -0.1370193, -0.1370193, -0.1370193…
## $ tfidf_title_game <dbl> -0.1645354, -0.1645354, -0.1645354, -0.1645354…
## $ tfidf_title_girlfriend <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_happiness <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_hd <dbl> -0.1634768, -0.1634768, -0.1634768, -0.1634768…
## $ tfidf_title_hyundai <dbl> -0.2217037, -0.2217037, -0.2217037, -0.2217037…
## $ tfidf_title_inside <dbl> -0.1063756, -0.1063756, -0.1063756, -0.1063756…
## $ tfidf_title_island <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_jackie <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_kia <dbl> -0.212374, -0.212374, -0.212374, -0.212374, -0…
## $ tfidf_title_king <dbl> -0.1178874, -0.1178874, -0.1178874, -0.1178874…
## $ tfidf_title_legends <dbl> -0.1178874, -0.1178874, -0.1178874, -0.1178874…
## $ tfidf_title_light <dbl> -0.4811747, -0.4811747, -0.4811747, -0.4811747…
## $ tfidf_title_lighta <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ tfidf_title_love <dbl> -0.1114199, -0.1114199, -0.1114199, -0.1114199…
## $ tfidf_title_meter <dbl> -0.1456353, -0.1456353, -0.1456353, -0.1456353…
## $ tfidf_title_new <dbl> -0.182815, -0.182815, -0.182815, -0.182815, -0…
## $ tfidf_title_nfl <dbl> -0.1659398, -0.1659398, 6.8232194, -0.1659398,…
## $ tfidf_title_of <dbl> -0.1380102, -0.1380102, -0.1380102, -0.1380102…
## $ tfidf_title_official <dbl> -0.1683342, -0.1683342, -0.1683342, -0.1683342…
## $ tfidf_title_on <dbl> -0.146020, -0.146020, -0.146020, -0.146020, -0…
## $ tfidf_title_one <dbl> -0.1186052, -0.1186052, -0.1186052, -0.1186052…
## $ tfidf_title_optima <dbl> -0.1129088, -0.1129088, -0.1129088, -0.1129088…
## $ tfidf_title_party <dbl> -0.1110605, -0.1110605, -0.1110605, -0.1110605…
## $ tfidf_title_pepsi <dbl> -0.2727937, -0.2727937, -0.2727937, -0.2727937…
## $ tfidf_title_puppy <dbl> -0.114315, -0.114315, -0.114315, -0.114315, -0…
## $ tfidf_title_ride <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_spot <dbl> -0.1183625, -0.1183625, -0.1183625, -0.1183625…
## $ tfidf_title_starring <dbl> -0.1392022, -0.1392022, -0.1392022, -0.1392022…
## $ tfidf_title_super <dbl> -0.7864152, -0.7864152, 1.8550289, -0.7864152,…
## $ tfidf_title_superbowl <dbl> -0.2784949, -0.2784949, -0.2784949, 3.2090502,…
## $ tfidf_title_the <dbl> -0.3175183, -0.3175183, -0.3175183, -0.3175183…
## $ tfidf_title_to <dbl> -0.1160093, -0.1160093, -0.1160093, -0.1160093…
## $ tfidf_title_toyota <dbl> -0.1353587, -0.1353587, -0.1353587, -0.1353587…
## $ tfidf_title_trade <dbl> -0.1645665, -0.1645665, -0.1645665, -0.1645665…
## $ tfidf_title_tv <dbl> -0.2030553, -0.2030553, -0.2030553, -0.2030553…
## $ tfidf_title_up <dbl> -0.1178874, -0.1178874, -0.1178874, -0.1178874…
## $ tfidf_title_usa <dbl> -0.1411492, -0.1411492, -0.1411492, -0.1411492…
## $ tfidf_title_version <dbl> -0.1680323, -0.1680323, -0.1680323, -0.1680323…
## $ tfidf_title_vs <dbl> -0.14436, -0.14436, -0.14436, -0.14436, -0.144…
## $ tfidf_title_winner <dbl> -0.1897352, -0.1897352, -0.1897352, -0.1897352…
## $ tfidf_title_x <dbl> -0.1337423, -0.1337423, -0.1337423, -0.1337423…
## $ tfidf_title_xli <dbl> -0.1187455, -0.1187455, -0.1187455, -0.1187455…
## $ tfidf_title_xliii <dbl> -0.2059473, -0.2059473, -0.2059473, 5.7783139,…
## $ tfidf_title_xliv <dbl> -0.1151266, -0.1151266, -0.1151266, -0.1151266…
## $ tfidf_title_xxxvi <dbl> -0.1160093, -0.1160093, -0.1160093, -0.1160093…
## $ tfidf_title_zero <dbl> -0.1191013, -0.1191013, -0.1191013, -0.1191013…
## $ category_id_X1 <dbl> -0.2742701, -0.2742701, -0.2742701, -0.2742701…
## $ category_id_X2 <dbl> -0.2742701, -0.2742701, -0.2742701, -0.2742701…
## $ category_id_X17 <dbl> -0.2887527, -0.2887527, 3.4387824, -0.2887527,…
## $ category_id_X22 <dbl> -0.3550914, -0.3550914, -0.3550914, -0.3550914…
## $ category_id_X23 <dbl> -0.4828346, 2.0565176, -0.4828346, -0.4828346,…
## $ category_id_X24 <dbl> 1.2720666, -0.7805863, -0.7805863, 1.2720666, …
## $ category_id_other <dbl> -0.3295524, -0.3295524, -0.3295524, -0.3295524…
## $ brand_Bud.Light <dbl> -0.5699116, -0.5699116, -0.5699116, -0.5699116…
## $ brand_Budweiser <dbl> 2.2095344, 2.2095344, -0.4493968, -0.4493968, …
## $ brand_Coca.Cola <dbl> -0.3027501, -0.3027501, -0.3027501, -0.3027501…
## $ brand_Doritos <dbl> -0.3550914, -0.3550914, -0.3550914, -0.3550914…
## $ brand_E.Trade <dbl> -0.243477, -0.243477, -0.243477, 4.078240, -0.…
## $ brand_Hynudai <dbl> -0.3163313, -0.3163313, -0.3163313, -0.3163313…
## $ brand_Kia <dbl> -0.2269068, -0.2269068, -0.2269068, -0.2269068…
## $ brand_NFL <dbl> -0.2093011, -0.2093011, 4.7441588, -0.2093011,…
## $ brand_Pepsi <dbl> -0.3550914, -0.3550914, -0.3550914, -0.3550914…
## $ brand_Toyota <dbl> -0.1903663, -0.1903663, -0.1903663, -0.1903663…
## $ funny_FALSE. <dbl> -0.6567227, -0.6567227, 1.5119894, -0.6567227,…
## $ funny_TRUE. <dbl> 0.6567227, 0.6567227, -1.5119894, 0.6567227, -…
# Specify Model
xgboost_spec_youtube1 <-
boost_tree(trees = tune(), min_n = tune()) %>%
set_mode("regression") %>%
set_engine("xgboost")
# Combine Recipe and Model Using Workflow
xgboost_workflow_youtube1 <-
workflow() %>%
add_recipe(xgboost_youtube_recipe1) %>%
add_model(xgboost_spec_youtube1)
# Tune Hyperparameters
set.seed(678)
tuned_youtube1 <-
tune_grid(xgboost_workflow_youtube1,
resamples = youtube_cv,
grid = 5)
## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.