ads <- read.csv("../00_data/myData.csv")
set.seed(1234)
data_small <- ads %>%
select(year, brand, like_count) %>%
sample_n(10)
data_small
## year brand like_count
## 1 2013 Hynudai 22
## 2 2004 Budweiser 3744
## 3 2007 Budweiser 47
## 4 2011 Hynudai 0
## 5 2000 Budweiser 163
## 6 2002 Budweiser 49
## 7 2008 Bud Light 26
## 8 2016 Doritos NA
## 9 2002 Bud Light 270
## 10 2010 Hynudai 306
ads_long <- data_small %>%
pivot_longer(cols = c('year','like_count'),
names_to = "count",
values_to = "number")
ads_long
## # A tibble: 20 × 3
## brand count number
## <chr> <chr> <int>
## 1 Hynudai year 2013
## 2 Hynudai like_count 22
## 3 Budweiser year 2004
## 4 Budweiser like_count 3744
## 5 Budweiser year 2007
## 6 Budweiser like_count 47
## 7 Hynudai year 2011
## 8 Hynudai like_count 0
## 9 Budweiser year 2000
## 10 Budweiser like_count 163
## 11 Budweiser year 2002
## 12 Budweiser like_count 49
## 13 Bud Light year 2008
## 14 Bud Light like_count 26
## 15 Doritos year 2016
## 16 Doritos like_count NA
## 17 Bud Light year 2002
## 18 Bud Light like_count 270
## 19 Hynudai year 2010
## 20 Hynudai like_count 306
ads_wide <- ads_long %>%
pivot_wider(names_from = 'count', values_from = number)
## Warning: Values from `number` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(brand, count)) |>
## dplyr::filter(n > 1L)
ads_wide
## # A tibble: 4 × 3
## brand year like_count
## <chr> <list> <list>
## 1 Hynudai <int [3]> <int [3]>
## 2 Budweiser <int [4]> <int [4]>
## 3 Bud Light <int [2]> <int [2]>
## 4 Doritos <int [1]> <int [1]>
ads_sep <- ads %>%
separate(col = published_at, into = c("year", "month", "day"))
## Warning: Expected 3 pieces. Additional pieces discarded in 231 rows [1, 2, 3, 4, 5, 6,
## 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
ads_unite <- ads_sep %>%
unite(col = "published_at", c(year,month,day), sep = "-")
ads %>%
complete(year, brand) %>%
select(year, brand, superbowl_ads_dot_com_url) %>%
arrange(desc(year))
## # A tibble: 326 × 3
## year brand superbowl_ads_dot_com_url
## <int> <chr> <chr>
## 1 2020 Bud Light https://superbowl-ads.com/2020-bud-light-seltzer-inside-post…
## 2 2020 Budweiser https://superbowl-ads.com/2020-budweiser-typical-american/
## 3 2020 Coca-Cola https://superbowl-ads.com/2020-coca-cola-energy-show-up/
## 4 2020 Doritos https://superbowl-ads.com/2020-doritos-the-cool-ranch-with-l…
## 5 2020 E-Trade <NA>
## 6 2020 Hynudai https://superbowl-ads.com/2020-hyundai-smaht-pahk/
## 7 2020 Kia https://superbowl-ads.com/2020-kia-tough-never-quits/
## 8 2020 NFL https://superbowl-ads.com/2020-nfl-next-100/
## 9 2020 Pepsi https://superbowl-ads.com/2020-pepsi-zero-sugar-done-right/
## 10 2020 Toyota https://superbowl-ads.com/2020-toyota-go-places-with-cobie-s…
## # ℹ 316 more rows