Done with the lovely, Prof. Jo Hardin!
library(tidytuesdayR)
library(tidyverse)
library(dplyr)
#install.packages('tuber')
library(tuber)
library(rvest)
library(httr)
#install.packages('ggforce')
library(ggforce)
tuesdata <- tidytuesdayR::tt_load('2021-03-02')
##
## Downloading file 1 of 1: `youtube.csv`
youtube <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-02/youtube.csv')
write.csv(youtube, "youtube.csv")
Creating a Parallel Sets Diagram
Do different factors of superbowl ads go hand in hand? Would we expect patriotic commercials to use celebrity appearances more often? Let’s explore these relationships in a parallel sets diagram:
yt_table <- youtube %>%
mutate(Funny = ifelse(funny, "funny", "not funny"),
Animals = ifelse(animals, "animals", "no animals"),
Sex = ifelse(use_sex, "uses sex", "no sex"),
Patriotic = ifelse(patriotic, "patriotic", "not patriotic"),
Celebrity = ifelse(celebrity, "celebrity", "no celebrity"),
Danger = ifelse(danger, "dangerous", "not dangerous"),
Product = ifelse(show_product_quickly, "quick product view", "delayed product view"),
Brand = brand) %>%
select(Brand, Funny, Animals, Sex, Patriotic, Celebrity, Danger, Product) %>%
table()
data <- reshape2::melt(yt_table)
data <- gather_set_data(data, 1:7)
data %>%
mutate(x = fct_relevel(x, "Brand", "Animals", "Funny", "Sex", "Patriotic",
"Celebrity", "Danger", "Product")) %>%
ggplot(aes(x, id = id, split = y, value = value)) +
geom_parallel_sets_axes(axis.width = 0.1, color = "lightgrey", fill = "white") +
geom_parallel_sets_labels(angle = c(rep(0, 10), rep(270, 12)),
position = position_nudge(c(rep(-.3, 10), rep(0.01, 12))) )+
geom_parallel_sets(aes(color = Brand, fill = Brand), alpha = 0.3, axis.width = 0.1) +
theme(legend.position = "none", axis.title.y = element_blank(), axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
xlab("")