theUrl <- "https://raw.githubusercontent.com/fivethirtyeight/superbowl-ads/main/superbowl-ads.csv"
superbowl <- read.table(file= theUrl, header=TRUE, sep=",")
subset_sb <- superbowl %>%
select(year, brand, superbowl_ads_dot_com_url, youtube_url, funny, celebrity, use_sex)
freq_table <- table(subset_sb$celebrity, subset_sb$funny)
freq_table_df <- as.data.frame.matrix(freq_table)
colnames(freq_table_df) <- c("Celebrities", "Funny")
ggplot(data = freq_table_df, aes(x = Celebrities, fill = factor(Funny), group = factor(Funny))) +
geom_bar(position = "fill") +
labs(x = "Presence of Celebrities", y = "Proportion") +
scale_fill_manual(values = c("TRUE" = "green", "FALSE" = "red")) +
theme_minimal()
#### Since both bars appear to be gray this means there is no
significant difference between the distribtion of each category. In
other words the presence or absence of celebrities does not strongly
influence wether or not an ad is funny.
subset_sb$use_sex <- as.logical(subset_sb$use_sex)
ads_with_sex <- sum(subset_sb$use_sex)
ads_with_sex
## [1] 63