Notes Mar 10

Harold Nelson

3/9/2021

Superbowl Ads

This is from Tidy Tuesday of March 2, 2021.

Get Data

youtube <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-02/youtube.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   year = col_double(),
##   funny = col_logical(),
##   show_product_quickly = col_logical(),
##   patriotic = col_logical(),
##   celebrity = col_logical(),
##   danger = col_logical(),
##   animals = col_logical(),
##   use_sex = col_logical(),
##   view_count = col_double(),
##   like_count = col_double(),
##   dislike_count = col_double(),
##   favorite_count = col_double(),
##   comment_count = col_double(),
##   published_at = col_datetime(format = ""),
##   category_id = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(knitr)

Usual Inspection

glimpse(youtube)
## Rows: 247
## Columns: 25
## $ year                      <dbl> 2018, 2020, 2006, 2018, 2003, 2020, 2020, 2…
## $ brand                     <chr> "Toyota", "Bud Light", "Bud Light", "Hynuda…
## $ superbowl_ads_dot_com_url <chr> "https://superbowl-ads.com/good-odds-toyota…
## $ youtube_url               <chr> "https://www.youtube.com/watch?v=zeBZvwYQ-h…
## $ funny                     <lgl> FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE,…
## $ show_product_quickly      <lgl> FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE…
## $ patriotic                 <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ celebrity                 <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, TRU…
## $ danger                    <lgl> FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE…
## $ animals                   <lgl> FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE…
## $ use_sex                   <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FA…
## $ id                        <chr> "zeBZvwYQ-hA", "nbbp0VW7z8w", "yk0MQD5YgV8"…
## $ kind                      <chr> "youtube#video", "youtube#video", "youtube#…
## $ etag                      <chr> "rn-ggKNly38Cl0C3CNjNnUH9xUw", "1roDoK-SYqS…
## $ view_count                <dbl> 173929, 47752, 142310, 198, 13741, 23636, 3…
## $ like_count                <dbl> 1233, 485, 129, 2, 20, 115, 1470, 78, 342, …
## $ dislike_count             <dbl> 38, 14, 15, 0, 3, 11, 384, 6, 7, 0, 14, 0, …
## $ favorite_count            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ comment_count             <dbl> NA, 14, 9, 0, 2, 13, 227, 6, 30, 0, 8, 1, 1…
## $ published_at              <dttm> 2018-02-03 11:29:14, 2020-01-31 21:04:13, …
## $ title                     <chr> "Toyota Super Bowl Commercial 2018 Good Odd…
## $ description               <chr> "Toyota Super Bowl Commercial 2018 Good Odd…
## $ thumbnail                 <chr> "https://i.ytimg.com/vi/zeBZvwYQ-hA/sddefau…
## $ channel_title             <chr> "Funny Commercials", "VCU Brandcenter", "Jo…
## $ category_id               <dbl> 1, 27, 17, 22, 24, 1, 24, 2, 24, 24, 24, 24…
str(youtube)
## spec_tbl_df [247 × 25] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ year                     : num [1:247] 2018 2020 2006 2018 2003 ...
##  $ brand                    : chr [1:247] "Toyota" "Bud Light" "Bud Light" "Hynudai" ...
##  $ superbowl_ads_dot_com_url: chr [1:247] "https://superbowl-ads.com/good-odds-toyota/" "https://superbowl-ads.com/2020-bud-light-seltzer-inside-posts-brain/" "https://superbowl-ads.com/2006-bud-light-bear-attack/" "https://superbowl-ads.com/hope-detector-nfl-super-bowl-lii-hyundai/" ...
##  $ youtube_url              : chr [1:247] "https://www.youtube.com/watch?v=zeBZvwYQ-hA" "https://www.youtube.com/watch?v=nbbp0VW7z8w" "https://www.youtube.com/watch?v=yk0MQD5YgV8" "https://www.youtube.com/watch?v=lNPccrGk77A" ...
##  $ funny                    : logi [1:247] FALSE TRUE TRUE FALSE TRUE TRUE ...
##  $ show_product_quickly     : logi [1:247] FALSE TRUE FALSE TRUE TRUE TRUE ...
##  $ patriotic                : logi [1:247] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ celebrity                : logi [1:247] FALSE TRUE FALSE FALSE FALSE TRUE ...
##  $ danger                   : logi [1:247] FALSE TRUE TRUE FALSE TRUE TRUE ...
##  $ animals                  : logi [1:247] FALSE FALSE TRUE FALSE TRUE TRUE ...
##  $ use_sex                  : logi [1:247] FALSE FALSE FALSE FALSE TRUE FALSE ...
##  $ id                       : chr [1:247] "zeBZvwYQ-hA" "nbbp0VW7z8w" "yk0MQD5YgV8" "lNPccrGk77A" ...
##  $ kind                     : chr [1:247] "youtube#video" "youtube#video" "youtube#video" "youtube#video" ...
##  $ etag                     : chr [1:247] "rn-ggKNly38Cl0C3CNjNnUH9xUw" "1roDoK-SYqSpqYwKbYrMH0jEJQ4" "OHiDfHTB3kilXfN8W0VTH0nwUIg" "G9Dhby9Xe1UpnfcIrHmcnZYRCFI" ...
##  $ view_count               : num [1:247] 173929 47752 142310 198 13741 ...
##  $ like_count               : num [1:247] 1233 485 129 2 20 ...
##  $ dislike_count            : num [1:247] 38 14 15 0 3 11 384 6 7 0 ...
##  $ favorite_count           : num [1:247] 0 0 0 0 0 0 0 0 0 0 ...
##  $ comment_count            : num [1:247] NA 14 9 0 2 13 227 6 30 0 ...
##  $ published_at             : POSIXct[1:247], format: "2018-02-03 11:29:14" "2020-01-31 21:04:13" ...
##  $ title                    : chr [1:247] "Toyota Super Bowl Commercial 2018 Good Odds" "Bud Light: Post Malone #PostyStore Inside Post's Brain" "Super Bowl 2006: Bud Light \"Save Yourself\"" "Hyundai / Hope Detector (2018)" ...
##  $ description              : chr [1:247] "Toyota Super Bowl Commercial 2018 Good Odds. You can watch Toyota Super Bowl 2018 commercial. Toyota has aired "| __truncated__ "Bud Light, Post Malone \"#PostyStore Inside Post's Brain\"\n\nGarrick Sheldon (Copywriting, 2014)\nWieden + Ken"| __truncated__ "Bud Light Super Bowl commercial where two guys camping encounter a bear." "TV or Web Commercial,  shot on location in MN with a non-union crew and contract, 2018. This commercial is uplo"| __truncated__ ...
##  $ thumbnail                : chr [1:247] "https://i.ytimg.com/vi/zeBZvwYQ-hA/sddefault.jpg" "https://i.ytimg.com/vi/nbbp0VW7z8w/sddefault.jpg" NA NA ...
##  $ channel_title            : chr [1:247] "Funny Commercials" "VCU Brandcenter" "John Keehler" "IATSE 490" ...
##  $ category_id              : num [1:247] 1 27 17 22 24 1 24 2 24 24 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   year = col_double(),
##   ..   brand = col_character(),
##   ..   superbowl_ads_dot_com_url = col_character(),
##   ..   youtube_url = col_character(),
##   ..   funny = col_logical(),
##   ..   show_product_quickly = col_logical(),
##   ..   patriotic = col_logical(),
##   ..   celebrity = col_logical(),
##   ..   danger = col_logical(),
##   ..   animals = col_logical(),
##   ..   use_sex = col_logical(),
##   ..   id = col_character(),
##   ..   kind = col_character(),
##   ..   etag = col_character(),
##   ..   view_count = col_double(),
##   ..   like_count = col_double(),
##   ..   dislike_count = col_double(),
##   ..   favorite_count = col_double(),
##   ..   comment_count = col_double(),
##   ..   published_at = col_datetime(format = ""),
##   ..   title = col_character(),
##   ..   description = col_character(),
##   ..   thumbnail = col_character(),
##   ..   channel_title = col_character(),
##   ..   category_id = col_double()
##   .. )
summary(youtube)
##       year         brand           superbowl_ads_dot_com_url youtube_url       
##  Min.   :2000   Length:247         Length:247                Length:247        
##  1st Qu.:2005   Class :character   Class :character          Class :character  
##  Median :2010   Mode  :character   Mode  :character          Mode  :character  
##  Mean   :2010                                                                  
##  3rd Qu.:2015                                                                  
##  Max.   :2020                                                                  
##                                                                                
##    funny         show_product_quickly patriotic       celebrity      
##  Mode :logical   Mode :logical        Mode :logical   Mode :logical  
##  FALSE:76        FALSE:78             FALSE:206       FALSE:176      
##  TRUE :171       TRUE :169            TRUE :41        TRUE :71       
##                                                                      
##                                                                      
##                                                                      
##                                                                      
##    danger         animals         use_sex             id           
##  Mode :logical   Mode :logical   Mode :logical   Length:247        
##  FALSE:172       FALSE:155       FALSE:181       Class :character  
##  TRUE :75        TRUE :92        TRUE :66        Mode  :character  
##                                                                    
##                                                                    
##                                                                    
##                                                                    
##      kind               etag             view_count          like_count    
##  Length:247         Length:247         Min.   :       10   Min.   :     0  
##  Class :character   Class :character   1st Qu.:     6431   1st Qu.:    19  
##  Mode  :character   Mode  :character   Median :    41379   Median :   130  
##                                        Mean   :  1407556   Mean   :  4146  
##                                        3rd Qu.:   170016   3rd Qu.:   527  
##                                        Max.   :176373378   Max.   :275362  
##                                        NA's   :16          NA's   :22      
##  dislike_count     favorite_count comment_count    
##  Min.   :    0.0   Min.   :0      Min.   :   0.00  
##  1st Qu.:    1.0   1st Qu.:0      1st Qu.:   1.00  
##  Median :    7.0   Median :0      Median :  10.00  
##  Mean   :  833.5   Mean   :0      Mean   : 188.64  
##  3rd Qu.:   24.0   3rd Qu.:0      3rd Qu.:  50.75  
##  Max.   :92990.0   Max.   :0      Max.   :9190.00  
##  NA's   :22        NA's   :16     NA's   :25       
##   published_at                    title           description       
##  Min.   :2006-02-06 10:02:36   Length:247         Length:247        
##  1st Qu.:2009-02-02 03:59:35   Class :character   Class :character  
##  Median :2013-01-31 09:13:55   Mode  :character   Mode  :character  
##  Mean   :2012-12-23 22:41:37                                        
##  3rd Qu.:2016-04-09 11:09:50                                        
##  Max.   :2021-01-27 13:11:29                                        
##  NA's   :16                                                         
##   thumbnail         channel_title       category_id   
##  Length:247         Length:247         Min.   : 1.00  
##  Class :character   Class :character   1st Qu.:17.00  
##  Mode  :character   Mode  :character   Median :23.00  
##                                        Mean   :19.32  
##                                        3rd Qu.:24.00  
##                                        Max.   :29.00  
##                                        NA's   :16

New Inspection Tool

The skimr package is an option for examining a dataframe. It returns a dataframe.

Packages

# install.packages("skimr")

library(skimr)

Get the skimr output and use View()

ys = skim(youtube)
kable(ys)
skim_type skim_variable n_missing complete_rate character.min character.max character.empty character.n_unique character.whitespace logical.mean logical.count numeric.mean numeric.sd numeric.p0 numeric.p25 numeric.p50 numeric.p75 numeric.p100 numeric.hist POSIXct.min POSIXct.max POSIXct.median POSIXct.n_unique
character brand 0 1.0000000 3 9 0 10 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character superbowl_ads_dot_com_url 0 1.0000000 34 120 0 244 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character youtube_url 11 0.9554656 43 43 0 233 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character id 11 0.9554656 11 11 0 233 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character kind 16 0.9352227 13 13 0 1 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character etag 16 0.9352227 27 27 0 228 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character title 16 0.9352227 6 99 0 228 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character description 50 0.7975709 3 3527 0 194 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character thumbnail 129 0.4777328 48 48 0 118 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
character channel_title 16 0.9352227 3 37 0 185 0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
logical funny 0 1.0000000 NA NA NA NA NA 0.6923077 TRU: 171, FAL: 76 NA NA NA NA NA NA NA NA NA NA NA NA
logical show_product_quickly 0 1.0000000 NA NA NA NA NA 0.6842105 TRU: 169, FAL: 78 NA NA NA NA NA NA NA NA NA NA NA NA
logical patriotic 0 1.0000000 NA NA NA NA NA 0.1659919 FAL: 206, TRU: 41 NA NA NA NA NA NA NA NA NA NA NA NA
logical celebrity 0 1.0000000 NA NA NA NA NA 0.2874494 FAL: 176, TRU: 71 NA NA NA NA NA NA NA NA NA NA NA NA
logical danger 0 1.0000000 NA NA NA NA NA 0.3036437 FAL: 172, TRU: 75 NA NA NA NA NA NA NA NA NA NA NA NA
logical animals 0 1.0000000 NA NA NA NA NA 0.3724696 FAL: 155, TRU: 92 NA NA NA NA NA NA NA NA NA NA NA NA
logical use_sex 0 1.0000000 NA NA NA NA NA 0.2672065 FAL: 181, TRU: 66 NA NA NA NA NA NA NA NA NA NA NA NA
numeric year 0 1.0000000 NA NA NA NA NA NA NA 2.010190e+03 5.860872e+00 2000 2005 2010 2015.00 2020 ▇▇▇▇▆ NA NA NA NA
numeric view_count 16 0.9352227 NA NA NA NA NA NA NA 1.407556e+06 1.197111e+07 10 6431 41379 170015.50 176373378 ▇▁▁▁▁ NA NA NA NA
numeric like_count 22 0.9109312 NA NA NA NA NA NA NA 4.146031e+03 2.392040e+04 0 19 130 527.00 275362 ▇▁▁▁▁ NA NA NA NA
numeric dislike_count 22 0.9109312 NA NA NA NA NA NA NA 8.335378e+02 6.948522e+03 0 1 7 24.00 92990 ▇▁▁▁▁ NA NA NA NA
numeric favorite_count 16 0.9352227 NA NA NA NA NA NA NA 0.000000e+00 0.000000e+00 0 0 0 0.00 0 ▁▁▇▁▁ NA NA NA NA
numeric comment_count 25 0.8987854 NA NA NA NA NA NA NA 1.886396e+02 9.864569e+02 0 1 10 50.75 9190 ▇▁▁▁▁ NA NA NA NA
numeric category_id 16 0.9352227 NA NA NA NA NA NA NA 1.931602e+01 8.004328e+00 1 17 23 24.00 29 ▃▁▂▆▇ NA NA NA NA
POSIXct published_at 16 0.9352227 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA 2006-02-06 10:02:36 2021-01-27 13:11:29 2013-01-31 09:13:55 227
# View(ys)