#Import Data:

horror <- horror_movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-01/horror_movies.csv')
## Rows: 32540 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): original_title, title, original_language, overview, tagline, post...
## dbl   (8): id, popularity, vote_count, vote_average, budget, revenue, runtim...
## lgl   (1): adult
## date  (1): release_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
skimr::skim(horror)
Data summary
Name horror
Number of rows 32540
Number of columns 20
_______________________
Column type frequency:
character 10
Date 1
logical 1
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
original_title 0 1.00 1 191 0 30296 0
title 0 1.00 1 191 0 29563 0
original_language 0 1.00 2 2 0 97 0
overview 1286 0.96 1 1000 0 31020 0
tagline 19835 0.39 1 237 0 12513 0
poster_path 4474 0.86 30 32 0 28048 0
status 0 1.00 7 15 0 4 0
backdrop_path 18995 0.42 29 32 0 13536 0
genre_names 0 1.00 6 144 0 772 0
collection_name 30234 0.07 4 56 0 815 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
release_date 0 1 1950-01-01 2022-12-31 2012-12-09 10999

Variable type: logical

skim_variable n_missing complete_rate mean count
adult 0 1 0 FAL: 32540

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 445910.83 305744.67 17 146494.8 426521.00 707534.00 1033095.00 ▇▆▆▅▅
popularity 0 1.00 4.01 37.51 0 0.6 0.84 2.24 5088.58 ▇▁▁▁▁
vote_count 0 1.00 62.69 420.89 0 0.0 2.00 11.00 16900.00 ▇▁▁▁▁
vote_average 0 1.00 3.34 2.88 0 0.0 4.00 5.70 10.00 ▇▂▆▃▁
budget 0 1.00 543126.59 4542667.81 0 0.0 0.00 0.00 200000000.00 ▇▁▁▁▁
revenue 0 1.00 1349746.73 14430479.15 0 0.0 0.00 0.00 701842551.00 ▇▁▁▁▁
runtime 0 1.00 62.14 41.00 0 14.0 80.00 91.00 683.00 ▇▁▁▁▁
collection 30234 0.07 481534.88 324498.16 656 155421.0 471259.00 759067.25 1033032.00 ▇▅▅▅▅
data <- horror %>%
    
    # Treat Missing Values
    select(-tagline, -release_date) %>%
    filter(budget != 0) %>%
    filter(revenue != 0) %>%
    na.omit() %>%
    
    # Log transform variables with pos-skewed distribution
    mutate(vote_average = log(vote_average))

Goal: Build a regression model to predict average movie rating (vote_average). Using the horror_movies dataset.

#Explore Data:

Identify Good Predictors.

Budget

data %>%
    ggplot(aes(vote_average, budget)) +
    scale_y_log10() +
    geom_point()

data %>%
    ggplot(aes(vote_average, as.factor(runtime))) +
    geom_boxplot()

Correlation Plot

# Step 1: Prepare Data
data_binarize_tbl <- data %>%
    select(-id, -original_title, -title, -overview) %>%
    binarize()
data_binarize_tbl %>%
    glimpse()
## Rows: 390
## Columns: 76
## $ original_language__en                                     <dbl> 1, 1, 1, 1, …
## $ original_language__es                                     <dbl> 0, 0, 0, 0, …
## $ original_language__hi                                     <dbl> 0, 0, 0, 0, …
## $ original_language__ja                                     <dbl> 0, 0, 0, 0, …
## $ original_language__ko                                     <dbl> 0, 0, 0, 0, …
## $ `original_language__-OTHER`                               <dbl> 0, 0, 0, 0, …
## $ `poster_path__/11tOuxWiGFzL60bVwoiF9SSoMLc.jpg`           <dbl> 0, 0, 0, 0, …
## $ `poster_path__-OTHER`                                     <dbl> 1, 1, 1, 1, …
## $ `popularity__-Inf_14.8335`                                <dbl> 0, 0, 0, 0, …
## $ popularity__14.8335_27.89                                 <dbl> 0, 0, 0, 0, …
## $ popularity__27.89_51.0185                                 <dbl> 0, 0, 0, 0, …
## $ popularity__51.0185_Inf                                   <dbl> 1, 1, 1, 1, …
## $ `vote_count__-Inf_424`                                    <dbl> 1, 0, 0, 0, …
## $ vote_count__424_1095.5                                    <dbl> 0, 1, 0, 0, …
## $ vote_count__1095.5_2517.5                                 <dbl> 0, 0, 1, 1, …
## $ vote_count__2517.5_Inf                                    <dbl> 0, 0, 0, 0, …
## $ `vote_average__-Inf_1.7227665977411`                      <dbl> 0, 0, 0, 0, …
## $ vote_average__1.7227665977411_1.80828877117927            <dbl> 1, 0, 0, 0, …
## $ vote_average__1.80828877117927_1.88706964903238           <dbl> 0, 0, 0, 0, …
## $ vote_average__1.88706964903238_Inf                        <dbl> 0, 1, 1, 1, …
## $ `budget__-Inf_2500000`                                    <dbl> 0, 0, 0, 0, …
## $ budget__2500000_8150000                                   <dbl> 0, 0, 0, 0, …
## $ `budget__8150000_1.9e+07`                                 <dbl> 0, 1, 0, 0, …
## $ `budget__1.9e+07_Inf`                                     <dbl> 1, 0, 1, 1, …
## $ `revenue__-Inf_10341767.5`                                <dbl> 1, 0, 0, 0, …
## $ revenue__10341767.5_33900000                              <dbl> 0, 1, 0, 0, …
## $ revenue__33900000_91132596.5                              <dbl> 0, 0, 0, 0, …
## $ revenue__91132596.5_Inf                                   <dbl> 0, 0, 1, 1, …
## $ `runtime__-Inf_89`                                        <dbl> 1, 0, 0, 0, …
## $ runtime__89_96                                            <dbl> 0, 0, 0, 0, …
## $ runtime__96_104.75                                        <dbl> 0, 0, 0, 0, …
## $ runtime__104.75_Inf                                       <dbl> 0, 1, 1, 1, …
## $ `backdrop_path__/114yPZmKgsQVTSt0BDx5WagBOvW.jpg`         <dbl> 0, 0, 0, 0, …
## $ `backdrop_path__-OTHER`                                   <dbl> 1, 1, 1, 1, …
## $ `genre_names__Action,_Adventure,_Horror,_Science_Fiction` <dbl> 0, 0, 0, 0, …
## $ `genre_names__Action,_Fantasy,_Horror`                    <dbl> 0, 0, 0, 0, …
## $ `genre_names__Action,_Horror,_Science_Fiction`            <dbl> 0, 0, 0, 0, …
## $ `genre_names__Action,_Horror,_Thriller`                   <dbl> 0, 0, 0, 0, …
## $ `genre_names__Adventure,_Horror,_Thriller`                <dbl> 0, 0, 0, 0, …
## $ `genre_names__Comedy,_Fantasy,_Horror`                    <dbl> 0, 0, 0, 0, …
## $ `genre_names__Comedy,_Horror`                             <dbl> 0, 0, 0, 0, …
## $ `genre_names__Comedy,_Horror,_Science_Fiction`            <dbl> 0, 0, 0, 0, …
## $ `genre_names__Crime,_Horror,_Thriller`                    <dbl> 0, 0, 0, 0, …
## $ `genre_names__Drama,_Horror,_Thriller`                    <dbl> 0, 0, 0, 0, …
## $ `genre_names__Fantasy,_Horror`                            <dbl> 0, 0, 0, 0, …
## $ genre_names__Horror                                       <dbl> 0, 0, 0, 0, …
## $ `genre_names__Horror,_Mystery`                            <dbl> 0, 0, 0, 0, …
## $ `genre_names__Horror,_Mystery,_Thriller`                  <dbl> 1, 1, 0, 1, …
## $ `genre_names__Horror,_Science_Fiction`                    <dbl> 0, 0, 0, 0, …
## $ `genre_names__Horror,_Science_Fiction,_Thriller`          <dbl> 0, 0, 0, 0, …
## $ `genre_names__Horror,_Thriller`                           <dbl> 0, 0, 1, 0, …
## $ `genre_names__-OTHER`                                     <dbl> 0, 0, 0, 0, …
## $ `collection__-Inf_12263`                                  <dbl> 0, 0, 0, 1, …
## $ collection__12263_111751                                  <dbl> 1, 0, 1, 0, …
## $ collection__111751_355090.5                               <dbl> 0, 0, 0, 0, …
## $ collection__355090.5_Inf                                  <dbl> 0, 1, 0, 0, …
## $ `collection_name__[REC]_Collection`                       <dbl> 0, 0, 0, 0, …
## $ collection_name__A_Nightmare_on_Elm_Street_Collection     <dbl> 0, 0, 0, 0, …
## $ `collection_name__Child's_Play_Collection`                <dbl> 0, 0, 0, 0, …
## $ collection_name__Evil_Dead_Collection                     <dbl> 0, 0, 0, 0, …
## $ collection_name__Final_Destination_Collection             <dbl> 0, 0, 0, 0, …
## $ collection_name__Friday_the_13th_Collection               <dbl> 0, 0, 0, 0, …
## $ collection_name__Halloween_Collection                     <dbl> 0, 0, 1, 0, …
## $ collection_name__Hellraiser_Collection                    <dbl> 0, 0, 0, 0, …
## $ collection_name__Insidious_Collection                     <dbl> 0, 0, 0, 0, …
## $ collection_name__Jaws_Collection                          <dbl> 0, 0, 0, 0, …
## $ collection_name__Jeepers_Creepers_Collection              <dbl> 1, 0, 0, 0, …
## $ collection_name__Living_Dead_Collection                   <dbl> 0, 0, 0, 0, …
## $ collection_name__Paranormal_Activity_Collection           <dbl> 0, 0, 0, 0, …
## $ collection_name__Resident_Evil_Collection                 <dbl> 0, 0, 0, 0, …
## $ collection_name__Saw_Collection                           <dbl> 0, 0, 0, 0, …
## $ collection_name__Scream_Collection                        <dbl> 0, 0, 0, 1, …
## $ collection_name__Texas_Chainsaw_Massacre_Collection       <dbl> 0, 0, 0, 0, …
## $ collection_name__The_Exorcist_Collection                  <dbl> 0, 0, 0, 0, …
## $ collection_name__The_Purge_Collection                     <dbl> 0, 0, 0, 0, …
## $ `collection_name__-OTHER`                                 <dbl> 0, 1, 0, 0, …
# Step 2: Correlate
data_corr_tbl <- data_binarize_tbl %>%
    correlate(vote_average__1.88706964903238_Inf)

# Step 3: Plot
data_corr_tbl %>%
    plot_correlation_funnel()
## Warning: ggrepel: 38 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Build Models

# data <- sample_n(data, 100)

# Split into train and test dataset
set.seed(123)
data_split <- initial_split(horror_movies, strata = vote_average)
data_train <- training(data_split)
data_test <- testing(data_split)

# Further split training dataset for cross-validation
set.seed(234)
data_cv <- vfold_cv(data_train, strata = vote_average)
data_cv
## #  10-fold cross-validation using stratification 
## # A tibble: 10 × 2
##    splits               id    
##    <list>               <chr> 
##  1 <split [21962/2442]> Fold01
##  2 <split [21962/2442]> Fold02
##  3 <split [21963/2441]> Fold03
##  4 <split [21963/2441]> Fold04
##  5 <split [21964/2440]> Fold05
##  6 <split [21964/2440]> Fold06
##  7 <split [21964/2440]> Fold07
##  8 <split [21964/2440]> Fold08
##  9 <split [21965/2439]> Fold09
## 10 <split [21965/2439]> Fold10
library(usemodels)
usemodels::use_xgboost(vote_average ~ ., data = data_train)
## xgboost_recipe <- 
##   recipe(formula = vote_average ~ ., data = data_train) %>% 
##   step_zv(all_predictors()) 
## 
## xgboost_spec <- 
##   boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(), 
##     loss_reduction = tune(), sample_size = tune()) %>% 
##   set_mode("classification") %>% 
##   set_engine("xgboost") 
## 
## xgboost_workflow <- 
##   workflow() %>% 
##   add_recipe(xgboost_recipe) %>% 
##   add_model(xgboost_spec) 
## 
## set.seed(29793)
## xgboost_tune <-
##   tune_grid(xgboost_workflow, resamples = stop("add your rsample object"), grid = stop("add number of candidate points"))
#specify recipe
#Specifiy Recipie
xgboost_recipe <-
    recipe(formula = vote_average ~ ., data = data_train) %>%
    
    step_tokenize(title) %>%
    step_tokenfilter(title, max_tokens = 100) %>%
    step_tfidf(title) %>%
    step_other(status) %>%
    step_dummy(status, one_hot = TRUE) %>%
    step_log(runtime)

xgboost_recipe %>% prep() %>% juice %>% glimpse()
## Rows: 24,404
## Columns: 120
## $ id                    <dbl> 1014226, 49690, 926762, 666479, 943221, 998340, …
## $ original_title        <fct> "Sonríe", "Smile", "Living with Chucky", "Sonríe…
## $ original_language     <fct> es, it, en, eo, es, en, en, en, en, en, fr, en, …
## $ overview              <fct> NA, "A carefree summer vacation turns into an in…
## $ tagline               <fct> NA, "Point. Shoot. Die.", "It Isn't Child's Play…
## $ release_date          <date> 2022-08-18, 2009-08-28, 2022-08-13, 2012-10-23,…
## $ poster_path           <fct> /wHowKf77gNZbVo9yu5ggiNBFCdJ.jpg, /5Hk4ubepZZNwl…
## $ popularity            <dbl> 939.712, 266.502, 163.646, 138.633, 122.085, 106…
## $ vote_count            <dbl> 1, 16, 2, 1, 4, 0, 2, 0, 0, 1, 13, 25, 0, 605, 1…
## $ budget                <dbl> 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00,…
## $ revenue               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16184, 0,…
## $ runtime               <dbl> -Inf, 4.7791235, 4.6539604, -Inf, -Inf, 2.708050…
## $ adult                 <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
## $ backdrop_path         <fct> NA, /9t60OgpIzoyLiL4nmQXb0ZNmbQA.jpg, NA, NA, /x…
## $ genre_names           <fct> "Horror, Thriller", "Horror, Thriller", "Documen…
## $ collection            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 4796…
## $ collection_name       <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Rob…
## $ vote_average          <dbl> 1.0, 3.9, 2.0, 2.0, 3.3, 0.0, 4.0, 0.0, 0.0, 2.0…
## $ tfidf_title_1         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_2         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_3         <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_4         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_a         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_alien     <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_all       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_an        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_and       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_at        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bad       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_beast     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_beyond    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_black     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_blood     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_bloody    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_curse     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_dark      <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_darkness  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_day       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_de        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 5.443397…
## $ tfidf_title_dead      <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_death     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_demon     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_devil     <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ `tfidf_title_devil's` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_die       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_do        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ `tfidf_title_don't`   <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_dracula   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_el        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_end       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_evil      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_eyes      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_fear      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_flesh     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_for       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_from      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_ghost     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_girl      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_halloween <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_haunted   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_haunting  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_hell      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_home      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_horror    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_house     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_i         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_ii        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_in        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_is        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_island    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_it        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_kill      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_killer    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_la        <dbl> 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.0…
## $ tfidf_title_lake      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_last      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_legend    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_little    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_living    <dbl> 0.000000, 0.000000, 2.861874, 0.000000, 0.000000…
## $ tfidf_title_love      <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_man       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_massacre  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_me        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_midnight  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_monster   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_movie     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_murder    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_my        <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_ni        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_night     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_nightmare <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_no        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_not       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_of        <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_on        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_one       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_part      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_red       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_return    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_room      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_school    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_story     <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_tales     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_terror    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_the       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_to        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_vampire   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_video     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_vs        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_witch     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_with      <dbl> 0.000000, 0.000000, 2.552322, 0.000000, 0.000000…
## $ tfidf_title_woman     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_woods     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_world     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_you       <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.00…
## $ tfidf_title_your      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ tfidf_title_zombie    <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ tfidf_title_zombies   <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 0.000000…
## $ status_Released       <dbl> 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ status_other          <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
#specify model
xgboost_spec <-
    boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune()) %>%
    set_mode("regression") %>%
    set_engine("xgboost")

# Combine recipe and model using workflow
xgboost_workflow <-
    workflow() %>%
    add_recipe(xgboost_recipe) %>%
    add_model(xgboost_spec)

# Tune hyperparameters
set.seed(344)
xgboost_tune <-
    tune_grid(xgboost_workflow,
              resamples = data_cv,
              grid = 5)
## → A | error:   Some columns are non-numeric. The data cannot be converted to numeric matrix: 'original_title', 'original_language', 'overview', 'tagline', 'release_date', 'poster_path', 'adult', 'backdrop_path', 'genre_names', 'collection_name'.
## 
There were issues with some computations   A: x1

There were issues with some computations   A: x4

There were issues with some computations   A: x6

There were issues with some computations   A: x8

There were issues with some computations   A: x10

There were issues with some computations   A: x11

There were issues with some computations   A: x12

There were issues with some computations   A: x15

There were issues with some computations   A: x16

There were issues with some computations   A: x19

There were issues with some computations   A: x21

There were issues with some computations   A: x24

There were issues with some computations   A: x26

There were issues with some computations   A: x28

There were issues with some computations   A: x31

There were issues with some computations   A: x32

There were issues with some computations   A: x35

There were issues with some computations   A: x36

There were issues with some computations   A: x39

There were issues with some computations   A: x41

There were issues with some computations   A: x44

There were issues with some computations   A: x46

There were issues with some computations   A: x49

There were issues with some computations   A: x50
## Warning: All models failed. Run `show_notes(.Last.tune.result)` for more
## information.